qemu/target-i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "disas/disas.h"
  24#include "tcg-op.h"
  25#include "exec/cpu_ldst.h"
  26
  27#include "exec/helper-proto.h"
  28#include "exec/helper-gen.h"
  29
  30#include "trace-tcg.h"
  31#include "exec/log.h"
  32
  33
  34#define PREFIX_REPZ   0x01
  35#define PREFIX_REPNZ  0x02
  36#define PREFIX_LOCK   0x04
  37#define PREFIX_DATA   0x08
  38#define PREFIX_ADR    0x10
  39#define PREFIX_VEX    0x20
  40
  41#ifdef TARGET_X86_64
  42#define CODE64(s) ((s)->code64)
  43#define REX_X(s) ((s)->rex_x)
  44#define REX_B(s) ((s)->rex_b)
  45#else
  46#define CODE64(s) 0
  47#define REX_X(s) 0
  48#define REX_B(s) 0
  49#endif
  50
  51#ifdef TARGET_X86_64
  52# define ctztl  ctz64
  53# define clztl  clz64
  54#else
  55# define ctztl  ctz32
  56# define clztl  clz32
  57#endif
  58
  59/* For a switch indexed by MODRM, match all memory operands for a given OP.  */
  60#define CASE_MODRM_MEM_OP(OP) \
  61    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  62    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  63    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
  64
  65#define CASE_MODRM_OP(OP) \
  66    case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
  67    case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
  68    case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
  69    case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
  70
  71//#define MACRO_TEST   1
  72
  73/* global register indexes */
  74static TCGv_env cpu_env;
  75static TCGv cpu_A0;
  76static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
  77static TCGv_i32 cpu_cc_op;
  78static TCGv cpu_regs[CPU_NB_REGS];
  79static TCGv cpu_seg_base[6];
  80static TCGv_i64 cpu_bndl[4];
  81static TCGv_i64 cpu_bndu[4];
  82/* local temps */
  83static TCGv cpu_T0, cpu_T1;
  84/* local register indexes (only used inside old micro ops) */
  85static TCGv cpu_tmp0, cpu_tmp4;
  86static TCGv_ptr cpu_ptr0, cpu_ptr1;
  87static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
  88static TCGv_i64 cpu_tmp1_i64;
  89
  90#include "exec/gen-icount.h"
  91
  92#ifdef TARGET_X86_64
  93static int x86_64_hregs;
  94#endif
  95
  96typedef struct DisasContext {
  97    /* current insn context */
  98    int override; /* -1 if no override */
  99    int prefix;
 100    TCGMemOp aflag;
 101    TCGMemOp dflag;
 102    target_ulong pc_start;
 103    target_ulong pc; /* pc = eip + cs_base */
 104    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
 105                   static state change (stop translation) */
 106    /* current block context */
 107    target_ulong cs_base; /* base of CS segment */
 108    int pe;     /* protected mode */
 109    int code32; /* 32 bit code segment */
 110#ifdef TARGET_X86_64
 111    int lma;    /* long mode active */
 112    int code64; /* 64 bit code segment */
 113    int rex_x, rex_b;
 114#endif
 115    int vex_l;  /* vex vector length */
 116    int vex_v;  /* vex vvvv register, without 1's compliment.  */
 117    int ss32;   /* 32 bit stack segment */
 118    CCOp cc_op;  /* current CC operation */
 119    bool cc_op_dirty;
 120    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 121    int f_st;   /* currently unused */
 122    int vm86;   /* vm86 mode */
 123    int cpl;
 124    int iopl;
 125    int tf;     /* TF cpu flag */
 126    int singlestep_enabled; /* "hardware" single step enabled */
 127    int jmp_opt; /* use direct block chaining for direct jumps */
 128    int repz_opt; /* optimize jumps within repz instructions */
 129    int mem_index; /* select memory access functions */
 130    uint64_t flags; /* all execution flags */
 131    struct TranslationBlock *tb;
 132    int popl_esp_hack; /* for correct popl with esp base handling */
 133    int rip_offset; /* only used in x86_64, but left for simplicity */
 134    int cpuid_features;
 135    int cpuid_ext_features;
 136    int cpuid_ext2_features;
 137    int cpuid_ext3_features;
 138    int cpuid_7_0_ebx_features;
 139    int cpuid_xsave_features;
 140} DisasContext;
 141
 142static void gen_eob(DisasContext *s);
 143static void gen_jmp(DisasContext *s, target_ulong eip);
 144static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 145static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 146
 147/* i386 arith/logic operations */
 148enum {
 149    OP_ADDL,
 150    OP_ORL,
 151    OP_ADCL,
 152    OP_SBBL,
 153    OP_ANDL,
 154    OP_SUBL,
 155    OP_XORL,
 156    OP_CMPL,
 157};
 158
 159/* i386 shift ops */
 160enum {
 161    OP_ROL,
 162    OP_ROR,
 163    OP_RCL,
 164    OP_RCR,
 165    OP_SHL,
 166    OP_SHR,
 167    OP_SHL1, /* undocumented */
 168    OP_SAR = 7,
 169};
 170
 171enum {
 172    JCC_O,
 173    JCC_B,
 174    JCC_Z,
 175    JCC_BE,
 176    JCC_S,
 177    JCC_P,
 178    JCC_L,
 179    JCC_LE,
 180};
 181
 182enum {
 183    /* I386 int registers */
 184    OR_EAX,   /* MUST be even numbered */
 185    OR_ECX,
 186    OR_EDX,
 187    OR_EBX,
 188    OR_ESP,
 189    OR_EBP,
 190    OR_ESI,
 191    OR_EDI,
 192
 193    OR_TMP0 = 16,    /* temporary operand register */
 194    OR_TMP1,
 195    OR_A0, /* temporary register used when doing address evaluation */
 196};
 197
 198enum {
 199    USES_CC_DST  = 1,
 200    USES_CC_SRC  = 2,
 201    USES_CC_SRC2 = 4,
 202    USES_CC_SRCT = 8,
 203};
 204
 205/* Bit set if the global variable is live after setting CC_OP to X.  */
 206static const uint8_t cc_op_live[CC_OP_NB] = {
 207    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 208    [CC_OP_EFLAGS] = USES_CC_SRC,
 209    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 210    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 211    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 212    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 213    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 214    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 215    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 216    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 217    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 218    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 219    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 220    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 221    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 222    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 223    [CC_OP_CLR] = 0,
 224};
 225
 226static void set_cc_op(DisasContext *s, CCOp op)
 227{
 228    int dead;
 229
 230    if (s->cc_op == op) {
 231        return;
 232    }
 233
 234    /* Discard CC computation that will no longer be used.  */
 235    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 236    if (dead & USES_CC_DST) {
 237        tcg_gen_discard_tl(cpu_cc_dst);
 238    }
 239    if (dead & USES_CC_SRC) {
 240        tcg_gen_discard_tl(cpu_cc_src);
 241    }
 242    if (dead & USES_CC_SRC2) {
 243        tcg_gen_discard_tl(cpu_cc_src2);
 244    }
 245    if (dead & USES_CC_SRCT) {
 246        tcg_gen_discard_tl(cpu_cc_srcT);
 247    }
 248
 249    if (op == CC_OP_DYNAMIC) {
 250        /* The DYNAMIC setting is translator only, and should never be
 251           stored.  Thus we always consider it clean.  */
 252        s->cc_op_dirty = false;
 253    } else {
 254        /* Discard any computed CC_OP value (see shifts).  */
 255        if (s->cc_op == CC_OP_DYNAMIC) {
 256            tcg_gen_discard_i32(cpu_cc_op);
 257        }
 258        s->cc_op_dirty = true;
 259    }
 260    s->cc_op = op;
 261}
 262
 263static void gen_update_cc_op(DisasContext *s)
 264{
 265    if (s->cc_op_dirty) {
 266        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 267        s->cc_op_dirty = false;
 268    }
 269}
 270
 271#ifdef TARGET_X86_64
 272
 273#define NB_OP_SIZES 4
 274
 275#else /* !TARGET_X86_64 */
 276
 277#define NB_OP_SIZES 3
 278
 279#endif /* !TARGET_X86_64 */
 280
 281#if defined(HOST_WORDS_BIGENDIAN)
 282#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 283#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 284#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 285#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 286#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 287#else
 288#define REG_B_OFFSET 0
 289#define REG_H_OFFSET 1
 290#define REG_W_OFFSET 0
 291#define REG_L_OFFSET 0
 292#define REG_LH_OFFSET 4
 293#endif
 294
 295/* In instruction encodings for byte register accesses the
 296 * register number usually indicates "low 8 bits of register N";
 297 * however there are some special cases where N 4..7 indicates
 298 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 299 * true for this special case, false otherwise.
 300 */
 301static inline bool byte_reg_is_xH(int reg)
 302{
 303    if (reg < 4) {
 304        return false;
 305    }
 306#ifdef TARGET_X86_64
 307    if (reg >= 8 || x86_64_hregs) {
 308        return false;
 309    }
 310#endif
 311    return true;
 312}
 313
 314/* Select the size of a push/pop operation.  */
 315static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 316{
 317    if (CODE64(s)) {
 318        return ot == MO_16 ? MO_16 : MO_64;
 319    } else {
 320        return ot;
 321    }
 322}
 323
 324/* Select the size of the stack pointer.  */
 325static inline TCGMemOp mo_stacksize(DisasContext *s)
 326{
 327    return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
 328}
 329
 330/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 331static inline TCGMemOp mo_64_32(TCGMemOp ot)
 332{
 333#ifdef TARGET_X86_64
 334    return ot == MO_64 ? MO_64 : MO_32;
 335#else
 336    return MO_32;
 337#endif
 338}
 339
 340/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 341   byte vs word opcodes.  */
 342static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 343{
 344    return b & 1 ? ot : MO_8;
 345}
 346
 347/* Select size 8 if lsb of B is clear, else OT capped at 32.
 348   Used for decoding operand size of port opcodes.  */
 349static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 350{
 351    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 352}
 353
 354static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 355{
 356    switch(ot) {
 357    case MO_8:
 358        if (!byte_reg_is_xH(reg)) {
 359            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 360        } else {
 361            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 362        }
 363        break;
 364    case MO_16:
 365        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 366        break;
 367    case MO_32:
 368        /* For x86_64, this sets the higher half of register to zero.
 369           For i386, this is equivalent to a mov. */
 370        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 371        break;
 372#ifdef TARGET_X86_64
 373    case MO_64:
 374        tcg_gen_mov_tl(cpu_regs[reg], t0);
 375        break;
 376#endif
 377    default:
 378        tcg_abort();
 379    }
 380}
 381
 382static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 383{
 384    if (ot == MO_8 && byte_reg_is_xH(reg)) {
 385        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
 386        tcg_gen_ext8u_tl(t0, t0);
 387    } else {
 388        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 389    }
 390}
 391
 392static void gen_add_A0_im(DisasContext *s, int val)
 393{
 394    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 395    if (!CODE64(s)) {
 396        tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 397    }
 398}
 399
 400static inline void gen_op_jmp_v(TCGv dest)
 401{
 402    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 403}
 404
 405static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 406{
 407    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
 408    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 409}
 410
 411static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 412{
 413    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T0);
 414    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 415}
 416
 417static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 418{
 419    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 420}
 421
 422static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 423{
 424    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 425}
 426
 427static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 428{
 429    if (d == OR_TMP0) {
 430        gen_op_st_v(s, idx, cpu_T0, cpu_A0);
 431    } else {
 432        gen_op_mov_reg_v(idx, d, cpu_T0);
 433    }
 434}
 435
 436static inline void gen_jmp_im(target_ulong pc)
 437{
 438    tcg_gen_movi_tl(cpu_tmp0, pc);
 439    gen_op_jmp_v(cpu_tmp0);
 440}
 441
 442/* Compute SEG:REG into A0.  SEG is selected from the override segment
 443   (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
 444   indicate no override.  */
 445static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 446                          int def_seg, int ovr_seg)
 447{
 448    switch (aflag) {
 449#ifdef TARGET_X86_64
 450    case MO_64:
 451        if (ovr_seg < 0) {
 452            tcg_gen_mov_tl(cpu_A0, a0);
 453            return;
 454        }
 455        break;
 456#endif
 457    case MO_32:
 458        /* 32 bit address */
 459        if (ovr_seg < 0) {
 460            if (s->addseg) {
 461                ovr_seg = def_seg;
 462            } else {
 463                tcg_gen_ext32u_tl(cpu_A0, a0);
 464                return;
 465            }
 466        }
 467        break;
 468    case MO_16:
 469        /* 16 bit address */
 470        tcg_gen_ext16u_tl(cpu_A0, a0);
 471        a0 = cpu_A0;
 472        if (ovr_seg < 0) {
 473            if (s->addseg) {
 474                ovr_seg = def_seg;
 475            } else {
 476                return;
 477            }
 478        }
 479        break;
 480    default:
 481        tcg_abort();
 482    }
 483
 484    if (ovr_seg >= 0) {
 485        TCGv seg = cpu_seg_base[ovr_seg];
 486
 487        if (aflag == MO_64) {
 488            tcg_gen_add_tl(cpu_A0, a0, seg);
 489        } else if (CODE64(s)) {
 490            tcg_gen_ext32u_tl(cpu_A0, a0);
 491            tcg_gen_add_tl(cpu_A0, cpu_A0, seg);
 492        } else {
 493            tcg_gen_add_tl(cpu_A0, a0, seg);
 494            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 495        }
 496    }
 497}
 498
 499static inline void gen_string_movl_A0_ESI(DisasContext *s)
 500{
 501    gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
 502}
 503
 504static inline void gen_string_movl_A0_EDI(DisasContext *s)
 505{
 506    gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
 507}
 508
 509static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 510{
 511    tcg_gen_ld32s_tl(cpu_T0, cpu_env, offsetof(CPUX86State, df));
 512    tcg_gen_shli_tl(cpu_T0, cpu_T0, ot);
 513};
 514
 515static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 516{
 517    switch (size) {
 518    case MO_8:
 519        if (sign) {
 520            tcg_gen_ext8s_tl(dst, src);
 521        } else {
 522            tcg_gen_ext8u_tl(dst, src);
 523        }
 524        return dst;
 525    case MO_16:
 526        if (sign) {
 527            tcg_gen_ext16s_tl(dst, src);
 528        } else {
 529            tcg_gen_ext16u_tl(dst, src);
 530        }
 531        return dst;
 532#ifdef TARGET_X86_64
 533    case MO_32:
 534        if (sign) {
 535            tcg_gen_ext32s_tl(dst, src);
 536        } else {
 537            tcg_gen_ext32u_tl(dst, src);
 538        }
 539        return dst;
 540#endif
 541    default:
 542        return src;
 543    }
 544}
 545
 546static void gen_extu(TCGMemOp ot, TCGv reg)
 547{
 548    gen_ext_tl(reg, reg, ot, false);
 549}
 550
 551static void gen_exts(TCGMemOp ot, TCGv reg)
 552{
 553    gen_ext_tl(reg, reg, ot, true);
 554}
 555
 556static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
 557{
 558    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 559    gen_extu(size, cpu_tmp0);
 560    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 561}
 562
 563static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
 564{
 565    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 566    gen_extu(size, cpu_tmp0);
 567    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 568}
 569
 570static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 571{
 572    switch (ot) {
 573    case MO_8:
 574        gen_helper_inb(v, cpu_env, n);
 575        break;
 576    case MO_16:
 577        gen_helper_inw(v, cpu_env, n);
 578        break;
 579    case MO_32:
 580        gen_helper_inl(v, cpu_env, n);
 581        break;
 582    default:
 583        tcg_abort();
 584    }
 585}
 586
 587static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 588{
 589    switch (ot) {
 590    case MO_8:
 591        gen_helper_outb(cpu_env, v, n);
 592        break;
 593    case MO_16:
 594        gen_helper_outw(cpu_env, v, n);
 595        break;
 596    case MO_32:
 597        gen_helper_outl(cpu_env, v, n);
 598        break;
 599    default:
 600        tcg_abort();
 601    }
 602}
 603
 604static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 605                         uint32_t svm_flags)
 606{
 607    target_ulong next_eip;
 608
 609    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 610        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 611        switch (ot) {
 612        case MO_8:
 613            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
 614            break;
 615        case MO_16:
 616            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
 617            break;
 618        case MO_32:
 619            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
 620            break;
 621        default:
 622            tcg_abort();
 623        }
 624    }
 625    if(s->flags & HF_SVMI_MASK) {
 626        gen_update_cc_op(s);
 627        gen_jmp_im(cur_eip);
 628        svm_flags |= (1 << (4 + ot));
 629        next_eip = s->pc - s->cs_base;
 630        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
 631        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
 632                                tcg_const_i32(svm_flags),
 633                                tcg_const_i32(next_eip - cur_eip));
 634    }
 635}
 636
 637static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 638{
 639    gen_string_movl_A0_ESI(s);
 640    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
 641    gen_string_movl_A0_EDI(s);
 642    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
 643    gen_op_movl_T0_Dshift(ot);
 644    gen_op_add_reg_T0(s->aflag, R_ESI);
 645    gen_op_add_reg_T0(s->aflag, R_EDI);
 646}
 647
 648static void gen_op_update1_cc(void)
 649{
 650    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 651}
 652
 653static void gen_op_update2_cc(void)
 654{
 655    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 656    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 657}
 658
 659static void gen_op_update3_cc(TCGv reg)
 660{
 661    tcg_gen_mov_tl(cpu_cc_src2, reg);
 662    tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
 663    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 664}
 665
 666static inline void gen_op_testl_T0_T1_cc(void)
 667{
 668    tcg_gen_and_tl(cpu_cc_dst, cpu_T0, cpu_T1);
 669}
 670
 671static void gen_op_update_neg_cc(void)
 672{
 673    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
 674    tcg_gen_neg_tl(cpu_cc_src, cpu_T0);
 675    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 676}
 677
 678/* compute all eflags to cc_src */
 679static void gen_compute_eflags(DisasContext *s)
 680{
 681    TCGv zero, dst, src1, src2;
 682    int live, dead;
 683
 684    if (s->cc_op == CC_OP_EFLAGS) {
 685        return;
 686    }
 687    if (s->cc_op == CC_OP_CLR) {
 688        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 689        set_cc_op(s, CC_OP_EFLAGS);
 690        return;
 691    }
 692
 693    TCGV_UNUSED(zero);
 694    dst = cpu_cc_dst;
 695    src1 = cpu_cc_src;
 696    src2 = cpu_cc_src2;
 697
 698    /* Take care to not read values that are not live.  */
 699    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 700    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 701    if (dead) {
 702        zero = tcg_const_tl(0);
 703        if (dead & USES_CC_DST) {
 704            dst = zero;
 705        }
 706        if (dead & USES_CC_SRC) {
 707            src1 = zero;
 708        }
 709        if (dead & USES_CC_SRC2) {
 710            src2 = zero;
 711        }
 712    }
 713
 714    gen_update_cc_op(s);
 715    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 716    set_cc_op(s, CC_OP_EFLAGS);
 717
 718    if (dead) {
 719        tcg_temp_free(zero);
 720    }
 721}
 722
 723typedef struct CCPrepare {
 724    TCGCond cond;
 725    TCGv reg;
 726    TCGv reg2;
 727    target_ulong imm;
 728    target_ulong mask;
 729    bool use_reg2;
 730    bool no_setcond;
 731} CCPrepare;
 732
 733/* compute eflags.C to reg */
 734static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 735{
 736    TCGv t0, t1;
 737    int size, shift;
 738
 739    switch (s->cc_op) {
 740    case CC_OP_SUBB ... CC_OP_SUBQ:
 741        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 742        size = s->cc_op - CC_OP_SUBB;
 743        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 744        /* If no temporary was used, be careful not to alias t1 and t0.  */
 745        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
 746        tcg_gen_mov_tl(t0, cpu_cc_srcT);
 747        gen_extu(size, t0);
 748        goto add_sub;
 749
 750    case CC_OP_ADDB ... CC_OP_ADDQ:
 751        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 752        size = s->cc_op - CC_OP_ADDB;
 753        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 754        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 755    add_sub:
 756        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 757                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 758
 759    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 760    case CC_OP_CLR:
 761        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 762
 763    case CC_OP_INCB ... CC_OP_INCQ:
 764    case CC_OP_DECB ... CC_OP_DECQ:
 765        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 766                             .mask = -1, .no_setcond = true };
 767
 768    case CC_OP_SHLB ... CC_OP_SHLQ:
 769        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 770        size = s->cc_op - CC_OP_SHLB;
 771        shift = (8 << size) - 1;
 772        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 773                             .mask = (target_ulong)1 << shift };
 774
 775    case CC_OP_MULB ... CC_OP_MULQ:
 776        return (CCPrepare) { .cond = TCG_COND_NE,
 777                             .reg = cpu_cc_src, .mask = -1 };
 778
 779    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 780        size = s->cc_op - CC_OP_BMILGB;
 781        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 782        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 783
 784    case CC_OP_ADCX:
 785    case CC_OP_ADCOX:
 786        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 787                             .mask = -1, .no_setcond = true };
 788
 789    case CC_OP_EFLAGS:
 790    case CC_OP_SARB ... CC_OP_SARQ:
 791        /* CC_SRC & 1 */
 792        return (CCPrepare) { .cond = TCG_COND_NE,
 793                             .reg = cpu_cc_src, .mask = CC_C };
 794
 795    default:
 796       /* The need to compute only C from CC_OP_DYNAMIC is important
 797          in efficiently implementing e.g. INC at the start of a TB.  */
 798       gen_update_cc_op(s);
 799       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 800                               cpu_cc_src2, cpu_cc_op);
 801       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 802                            .mask = -1, .no_setcond = true };
 803    }
 804}
 805
 806/* compute eflags.P to reg */
 807static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 808{
 809    gen_compute_eflags(s);
 810    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 811                         .mask = CC_P };
 812}
 813
 814/* compute eflags.S to reg */
 815static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 816{
 817    switch (s->cc_op) {
 818    case CC_OP_DYNAMIC:
 819        gen_compute_eflags(s);
 820        /* FALLTHRU */
 821    case CC_OP_EFLAGS:
 822    case CC_OP_ADCX:
 823    case CC_OP_ADOX:
 824    case CC_OP_ADCOX:
 825        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 826                             .mask = CC_S };
 827    case CC_OP_CLR:
 828        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 829    default:
 830        {
 831            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 832            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 833            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 834        }
 835    }
 836}
 837
 838/* compute eflags.O to reg */
 839static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 840{
 841    switch (s->cc_op) {
 842    case CC_OP_ADOX:
 843    case CC_OP_ADCOX:
 844        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 845                             .mask = -1, .no_setcond = true };
 846    case CC_OP_CLR:
 847        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 848    default:
 849        gen_compute_eflags(s);
 850        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 851                             .mask = CC_O };
 852    }
 853}
 854
 855/* compute eflags.Z to reg */
 856static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 857{
 858    switch (s->cc_op) {
 859    case CC_OP_DYNAMIC:
 860        gen_compute_eflags(s);
 861        /* FALLTHRU */
 862    case CC_OP_EFLAGS:
 863    case CC_OP_ADCX:
 864    case CC_OP_ADOX:
 865    case CC_OP_ADCOX:
 866        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 867                             .mask = CC_Z };
 868    case CC_OP_CLR:
 869        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 870    default:
 871        {
 872            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 873            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 874            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 875        }
 876    }
 877}
 878
 879/* perform a conditional store into register 'reg' according to jump opcode
 880   value 'b'. In the fast case, T0 is guaranted not to be used. */
 881static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 882{
 883    int inv, jcc_op, cond;
 884    TCGMemOp size;
 885    CCPrepare cc;
 886    TCGv t0;
 887
 888    inv = b & 1;
 889    jcc_op = (b >> 1) & 7;
 890
 891    switch (s->cc_op) {
 892    case CC_OP_SUBB ... CC_OP_SUBQ:
 893        /* We optimize relational operators for the cmp/jcc case.  */
 894        size = s->cc_op - CC_OP_SUBB;
 895        switch (jcc_op) {
 896        case JCC_BE:
 897            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 898            gen_extu(size, cpu_tmp4);
 899            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 900            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
 901                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 902            break;
 903
 904        case JCC_L:
 905            cond = TCG_COND_LT;
 906            goto fast_jcc_l;
 907        case JCC_LE:
 908            cond = TCG_COND_LE;
 909        fast_jcc_l:
 910            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 911            gen_exts(size, cpu_tmp4);
 912            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
 913            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
 914                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 915            break;
 916
 917        default:
 918            goto slow_jcc;
 919        }
 920        break;
 921
 922    default:
 923    slow_jcc:
 924        /* This actually generates good code for JC, JZ and JS.  */
 925        switch (jcc_op) {
 926        case JCC_O:
 927            cc = gen_prepare_eflags_o(s, reg);
 928            break;
 929        case JCC_B:
 930            cc = gen_prepare_eflags_c(s, reg);
 931            break;
 932        case JCC_Z:
 933            cc = gen_prepare_eflags_z(s, reg);
 934            break;
 935        case JCC_BE:
 936            gen_compute_eflags(s);
 937            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 938                               .mask = CC_Z | CC_C };
 939            break;
 940        case JCC_S:
 941            cc = gen_prepare_eflags_s(s, reg);
 942            break;
 943        case JCC_P:
 944            cc = gen_prepare_eflags_p(s, reg);
 945            break;
 946        case JCC_L:
 947            gen_compute_eflags(s);
 948            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 949                reg = cpu_tmp0;
 950            }
 951            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 952            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 953            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 954                               .mask = CC_S };
 955            break;
 956        default:
 957        case JCC_LE:
 958            gen_compute_eflags(s);
 959            if (TCGV_EQUAL(reg, cpu_cc_src)) {
 960                reg = cpu_tmp0;
 961            }
 962            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
 963            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
 964            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 965                               .mask = CC_S | CC_Z };
 966            break;
 967        }
 968        break;
 969    }
 970
 971    if (inv) {
 972        cc.cond = tcg_invert_cond(cc.cond);
 973    }
 974    return cc;
 975}
 976
 977static void gen_setcc1(DisasContext *s, int b, TCGv reg)
 978{
 979    CCPrepare cc = gen_prepare_cc(s, b, reg);
 980
 981    if (cc.no_setcond) {
 982        if (cc.cond == TCG_COND_EQ) {
 983            tcg_gen_xori_tl(reg, cc.reg, 1);
 984        } else {
 985            tcg_gen_mov_tl(reg, cc.reg);
 986        }
 987        return;
 988    }
 989
 990    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
 991        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
 992        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
 993        tcg_gen_andi_tl(reg, reg, 1);
 994        return;
 995    }
 996    if (cc.mask != -1) {
 997        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
 998        cc.reg = reg;
 999    }
1000    if (cc.use_reg2) {
1001        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1002    } else {
1003        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1004    }
1005}
1006
1007static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1008{
1009    gen_setcc1(s, JCC_B << 1, reg);
1010}
1011
1012/* generate a conditional jump to label 'l1' according to jump opcode
1013   value 'b'. In the fast case, T0 is guaranted not to be used. */
1014static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1015{
1016    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1017
1018    if (cc.mask != -1) {
1019        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1020        cc.reg = cpu_T0;
1021    }
1022    if (cc.use_reg2) {
1023        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1024    } else {
1025        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1026    }
1027}
1028
1029/* Generate a conditional jump to label 'l1' according to jump opcode
1030   value 'b'. In the fast case, T0 is guaranted not to be used.
1031   A translation block must end soon.  */
1032static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1033{
1034    CCPrepare cc = gen_prepare_cc(s, b, cpu_T0);
1035
1036    gen_update_cc_op(s);
1037    if (cc.mask != -1) {
1038        tcg_gen_andi_tl(cpu_T0, cc.reg, cc.mask);
1039        cc.reg = cpu_T0;
1040    }
1041    set_cc_op(s, CC_OP_DYNAMIC);
1042    if (cc.use_reg2) {
1043        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1044    } else {
1045        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1046    }
1047}
1048
1049/* XXX: does not work with gdbstub "ice" single step - not a
1050   serious problem */
1051static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1052{
1053    TCGLabel *l1 = gen_new_label();
1054    TCGLabel *l2 = gen_new_label();
1055    gen_op_jnz_ecx(s->aflag, l1);
1056    gen_set_label(l2);
1057    gen_jmp_tb(s, next_eip, 1);
1058    gen_set_label(l1);
1059    return l2;
1060}
1061
1062static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1063{
1064    gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
1065    gen_string_movl_A0_EDI(s);
1066    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1067    gen_op_movl_T0_Dshift(ot);
1068    gen_op_add_reg_T0(s->aflag, R_EDI);
1069}
1070
1071static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1072{
1073    gen_string_movl_A0_ESI(s);
1074    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1075    gen_op_mov_reg_v(ot, R_EAX, cpu_T0);
1076    gen_op_movl_T0_Dshift(ot);
1077    gen_op_add_reg_T0(s->aflag, R_ESI);
1078}
1079
1080static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1081{
1082    gen_string_movl_A0_EDI(s);
1083    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1084    gen_op(s, OP_CMPL, ot, R_EAX);
1085    gen_op_movl_T0_Dshift(ot);
1086    gen_op_add_reg_T0(s->aflag, R_EDI);
1087}
1088
1089static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1090{
1091    gen_string_movl_A0_EDI(s);
1092    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
1093    gen_string_movl_A0_ESI(s);
1094    gen_op(s, OP_CMPL, ot, OR_TMP0);
1095    gen_op_movl_T0_Dshift(ot);
1096    gen_op_add_reg_T0(s->aflag, R_ESI);
1097    gen_op_add_reg_T0(s->aflag, R_EDI);
1098}
1099
1100static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1101{
1102    if (s->flags & HF_IOBPT_MASK) {
1103        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1104        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1105
1106        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1107        tcg_temp_free_i32(t_size);
1108        tcg_temp_free(t_next);
1109    }
1110}
1111
1112
1113static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1114{
1115    if (s->tb->cflags & CF_USE_ICOUNT) {
1116        gen_io_start();
1117    }
1118    gen_string_movl_A0_EDI(s);
1119    /* Note: we must do this dummy write first to be restartable in
1120       case of page fault. */
1121    tcg_gen_movi_tl(cpu_T0, 0);
1122    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1123    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1124    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1125    gen_helper_in_func(ot, cpu_T0, cpu_tmp2_i32);
1126    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
1127    gen_op_movl_T0_Dshift(ot);
1128    gen_op_add_reg_T0(s->aflag, R_EDI);
1129    gen_bpt_io(s, cpu_tmp2_i32, ot);
1130    if (s->tb->cflags & CF_USE_ICOUNT) {
1131        gen_io_end();
1132    }
1133}
1134
1135static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1136{
1137    if (s->tb->cflags & CF_USE_ICOUNT) {
1138        gen_io_start();
1139    }
1140    gen_string_movl_A0_ESI(s);
1141    gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1142
1143    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1144    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1145    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T0);
1146    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1147    gen_op_movl_T0_Dshift(ot);
1148    gen_op_add_reg_T0(s->aflag, R_ESI);
1149    gen_bpt_io(s, cpu_tmp2_i32, ot);
1150    if (s->tb->cflags & CF_USE_ICOUNT) {
1151        gen_io_end();
1152    }
1153}
1154
1155/* same method as Valgrind : we generate jumps to current or next
1156   instruction */
1157#define GEN_REPZ(op)                                                          \
1158static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1159                                 target_ulong cur_eip, target_ulong next_eip) \
1160{                                                                             \
1161    TCGLabel *l2;                                                             \
1162    gen_update_cc_op(s);                                                      \
1163    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1164    gen_ ## op(s, ot);                                                        \
1165    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1166    /* a loop would cause two single step exceptions if ECX = 1               \
1167       before rep string_insn */                                              \
1168    if (s->repz_opt)                                                          \
1169        gen_op_jz_ecx(s->aflag, l2);                                          \
1170    gen_jmp(s, cur_eip);                                                      \
1171}
1172
1173#define GEN_REPZ2(op)                                                         \
1174static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1175                                   target_ulong cur_eip,                      \
1176                                   target_ulong next_eip,                     \
1177                                   int nz)                                    \
1178{                                                                             \
1179    TCGLabel *l2;                                                             \
1180    gen_update_cc_op(s);                                                      \
1181    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1182    gen_ ## op(s, ot);                                                        \
1183    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1184    gen_update_cc_op(s);                                                      \
1185    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1186    if (s->repz_opt)                                                          \
1187        gen_op_jz_ecx(s->aflag, l2);                                          \
1188    gen_jmp(s, cur_eip);                                                      \
1189}
1190
1191GEN_REPZ(movs)
1192GEN_REPZ(stos)
1193GEN_REPZ(lods)
1194GEN_REPZ(ins)
1195GEN_REPZ(outs)
1196GEN_REPZ2(scas)
1197GEN_REPZ2(cmps)
1198
1199static void gen_helper_fp_arith_ST0_FT0(int op)
1200{
1201    switch (op) {
1202    case 0:
1203        gen_helper_fadd_ST0_FT0(cpu_env);
1204        break;
1205    case 1:
1206        gen_helper_fmul_ST0_FT0(cpu_env);
1207        break;
1208    case 2:
1209        gen_helper_fcom_ST0_FT0(cpu_env);
1210        break;
1211    case 3:
1212        gen_helper_fcom_ST0_FT0(cpu_env);
1213        break;
1214    case 4:
1215        gen_helper_fsub_ST0_FT0(cpu_env);
1216        break;
1217    case 5:
1218        gen_helper_fsubr_ST0_FT0(cpu_env);
1219        break;
1220    case 6:
1221        gen_helper_fdiv_ST0_FT0(cpu_env);
1222        break;
1223    case 7:
1224        gen_helper_fdivr_ST0_FT0(cpu_env);
1225        break;
1226    }
1227}
1228
1229/* NOTE the exception in "r" op ordering */
1230static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1231{
1232    TCGv_i32 tmp = tcg_const_i32(opreg);
1233    switch (op) {
1234    case 0:
1235        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1236        break;
1237    case 1:
1238        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1239        break;
1240    case 4:
1241        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1242        break;
1243    case 5:
1244        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1245        break;
1246    case 6:
1247        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1248        break;
1249    case 7:
1250        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1251        break;
1252    }
1253}
1254
1255/* if d == OR_TMP0, it means memory operand (address in A0) */
1256static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1257{
1258    if (d != OR_TMP0) {
1259        gen_op_mov_v_reg(ot, cpu_T0, d);
1260    } else {
1261        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1262    }
1263    switch(op) {
1264    case OP_ADCL:
1265        gen_compute_eflags_c(s1, cpu_tmp4);
1266        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1267        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
1268        gen_op_st_rm_T0_A0(s1, ot, d);
1269        gen_op_update3_cc(cpu_tmp4);
1270        set_cc_op(s1, CC_OP_ADCB + ot);
1271        break;
1272    case OP_SBBL:
1273        gen_compute_eflags_c(s1, cpu_tmp4);
1274        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1275        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
1276        gen_op_st_rm_T0_A0(s1, ot, d);
1277        gen_op_update3_cc(cpu_tmp4);
1278        set_cc_op(s1, CC_OP_SBBB + ot);
1279        break;
1280    case OP_ADDL:
1281        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
1282        gen_op_st_rm_T0_A0(s1, ot, d);
1283        gen_op_update2_cc();
1284        set_cc_op(s1, CC_OP_ADDB + ot);
1285        break;
1286    case OP_SUBL:
1287        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1288        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
1289        gen_op_st_rm_T0_A0(s1, ot, d);
1290        gen_op_update2_cc();
1291        set_cc_op(s1, CC_OP_SUBB + ot);
1292        break;
1293    default:
1294    case OP_ANDL:
1295        tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
1296        gen_op_st_rm_T0_A0(s1, ot, d);
1297        gen_op_update1_cc();
1298        set_cc_op(s1, CC_OP_LOGICB + ot);
1299        break;
1300    case OP_ORL:
1301        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1302        gen_op_st_rm_T0_A0(s1, ot, d);
1303        gen_op_update1_cc();
1304        set_cc_op(s1, CC_OP_LOGICB + ot);
1305        break;
1306    case OP_XORL:
1307        tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
1308        gen_op_st_rm_T0_A0(s1, ot, d);
1309        gen_op_update1_cc();
1310        set_cc_op(s1, CC_OP_LOGICB + ot);
1311        break;
1312    case OP_CMPL:
1313        tcg_gen_mov_tl(cpu_cc_src, cpu_T1);
1314        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
1315        tcg_gen_sub_tl(cpu_cc_dst, cpu_T0, cpu_T1);
1316        set_cc_op(s1, CC_OP_SUBB + ot);
1317        break;
1318    }
1319}
1320
1321/* if d == OR_TMP0, it means memory operand (address in A0) */
1322static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1323{
1324    if (d != OR_TMP0) {
1325        gen_op_mov_v_reg(ot, cpu_T0, d);
1326    } else {
1327        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
1328    }
1329    gen_compute_eflags_c(s1, cpu_cc_src);
1330    if (c > 0) {
1331        tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
1332        set_cc_op(s1, CC_OP_INCB + ot);
1333    } else {
1334        tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
1335        set_cc_op(s1, CC_OP_DECB + ot);
1336    }
1337    gen_op_st_rm_T0_A0(s1, ot, d);
1338    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1339}
1340
1341static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1342                            TCGv shm1, TCGv count, bool is_right)
1343{
1344    TCGv_i32 z32, s32, oldop;
1345    TCGv z_tl;
1346
1347    /* Store the results into the CC variables.  If we know that the
1348       variable must be dead, store unconditionally.  Otherwise we'll
1349       need to not disrupt the current contents.  */
1350    z_tl = tcg_const_tl(0);
1351    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1352        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1353                           result, cpu_cc_dst);
1354    } else {
1355        tcg_gen_mov_tl(cpu_cc_dst, result);
1356    }
1357    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1358        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1359                           shm1, cpu_cc_src);
1360    } else {
1361        tcg_gen_mov_tl(cpu_cc_src, shm1);
1362    }
1363    tcg_temp_free(z_tl);
1364
1365    /* Get the two potential CC_OP values into temporaries.  */
1366    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1367    if (s->cc_op == CC_OP_DYNAMIC) {
1368        oldop = cpu_cc_op;
1369    } else {
1370        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1371        oldop = cpu_tmp3_i32;
1372    }
1373
1374    /* Conditionally store the CC_OP value.  */
1375    z32 = tcg_const_i32(0);
1376    s32 = tcg_temp_new_i32();
1377    tcg_gen_trunc_tl_i32(s32, count);
1378    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1379    tcg_temp_free_i32(z32);
1380    tcg_temp_free_i32(s32);
1381
1382    /* The CC_OP value is no longer predictable.  */
1383    set_cc_op(s, CC_OP_DYNAMIC);
1384}
1385
1386static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1387                            int is_right, int is_arith)
1388{
1389    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1390
1391    /* load */
1392    if (op1 == OR_TMP0) {
1393        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1394    } else {
1395        gen_op_mov_v_reg(ot, cpu_T0, op1);
1396    }
1397
1398    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1399    tcg_gen_subi_tl(cpu_tmp0, cpu_T1, 1);
1400
1401    if (is_right) {
1402        if (is_arith) {
1403            gen_exts(ot, cpu_T0);
1404            tcg_gen_sar_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1405            tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
1406        } else {
1407            gen_extu(ot, cpu_T0);
1408            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1409            tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
1410        }
1411    } else {
1412        tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1413        tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
1414    }
1415
1416    /* store */
1417    gen_op_st_rm_T0_A0(s, ot, op1);
1418
1419    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, cpu_T1, is_right);
1420}
1421
1422static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1423                            int is_right, int is_arith)
1424{
1425    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1426
1427    /* load */
1428    if (op1 == OR_TMP0)
1429        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1430    else
1431        gen_op_mov_v_reg(ot, cpu_T0, op1);
1432
1433    op2 &= mask;
1434    if (op2 != 0) {
1435        if (is_right) {
1436            if (is_arith) {
1437                gen_exts(ot, cpu_T0);
1438                tcg_gen_sari_tl(cpu_tmp4, cpu_T0, op2 - 1);
1439                tcg_gen_sari_tl(cpu_T0, cpu_T0, op2);
1440            } else {
1441                gen_extu(ot, cpu_T0);
1442                tcg_gen_shri_tl(cpu_tmp4, cpu_T0, op2 - 1);
1443                tcg_gen_shri_tl(cpu_T0, cpu_T0, op2);
1444            }
1445        } else {
1446            tcg_gen_shli_tl(cpu_tmp4, cpu_T0, op2 - 1);
1447            tcg_gen_shli_tl(cpu_T0, cpu_T0, op2);
1448        }
1449    }
1450
1451    /* store */
1452    gen_op_st_rm_T0_A0(s, ot, op1);
1453
1454    /* update eflags if non zero shift */
1455    if (op2 != 0) {
1456        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1457        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
1458        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1459    }
1460}
1461
1462static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1463{
1464    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1465    TCGv_i32 t0, t1;
1466
1467    /* load */
1468    if (op1 == OR_TMP0) {
1469        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1470    } else {
1471        gen_op_mov_v_reg(ot, cpu_T0, op1);
1472    }
1473
1474    tcg_gen_andi_tl(cpu_T1, cpu_T1, mask);
1475
1476    switch (ot) {
1477    case MO_8:
1478        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1479        tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
1480        tcg_gen_muli_tl(cpu_T0, cpu_T0, 0x01010101);
1481        goto do_long;
1482    case MO_16:
1483        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1484        tcg_gen_deposit_tl(cpu_T0, cpu_T0, cpu_T0, 16, 16);
1485        goto do_long;
1486    do_long:
1487#ifdef TARGET_X86_64
1488    case MO_32:
1489        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1490        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
1491        if (is_right) {
1492            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1493        } else {
1494            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1495        }
1496        tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1497        break;
1498#endif
1499    default:
1500        if (is_right) {
1501            tcg_gen_rotr_tl(cpu_T0, cpu_T0, cpu_T1);
1502        } else {
1503            tcg_gen_rotl_tl(cpu_T0, cpu_T0, cpu_T1);
1504        }
1505        break;
1506    }
1507
1508    /* store */
1509    gen_op_st_rm_T0_A0(s, ot, op1);
1510
1511    /* We'll need the flags computed into CC_SRC.  */
1512    gen_compute_eflags(s);
1513
1514    /* The value that was "rotated out" is now present at the other end
1515       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1516       since we've computed the flags into CC_SRC, these variables are
1517       currently dead.  */
1518    if (is_right) {
1519        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1520        tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1521        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1522    } else {
1523        tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1524        tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1525    }
1526    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1527    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1528
1529    /* Now conditionally store the new CC_OP value.  If the shift count
1530       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1531       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1532       exactly as we computed above.  */
1533    t0 = tcg_const_i32(0);
1534    t1 = tcg_temp_new_i32();
1535    tcg_gen_trunc_tl_i32(t1, cpu_T1);
1536    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
1537    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1538    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1539                        cpu_tmp2_i32, cpu_tmp3_i32);
1540    tcg_temp_free_i32(t0);
1541    tcg_temp_free_i32(t1);
1542
1543    /* The CC_OP value is no longer predictable.  */ 
1544    set_cc_op(s, CC_OP_DYNAMIC);
1545}
1546
1547static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1548                          int is_right)
1549{
1550    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1551    int shift;
1552
1553    /* load */
1554    if (op1 == OR_TMP0) {
1555        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1556    } else {
1557        gen_op_mov_v_reg(ot, cpu_T0, op1);
1558    }
1559
1560    op2 &= mask;
1561    if (op2 != 0) {
1562        switch (ot) {
1563#ifdef TARGET_X86_64
1564        case MO_32:
1565            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
1566            if (is_right) {
1567                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1568            } else {
1569                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1570            }
1571            tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
1572            break;
1573#endif
1574        default:
1575            if (is_right) {
1576                tcg_gen_rotri_tl(cpu_T0, cpu_T0, op2);
1577            } else {
1578                tcg_gen_rotli_tl(cpu_T0, cpu_T0, op2);
1579            }
1580            break;
1581        case MO_8:
1582            mask = 7;
1583            goto do_shifts;
1584        case MO_16:
1585            mask = 15;
1586        do_shifts:
1587            shift = op2 & mask;
1588            if (is_right) {
1589                shift = mask + 1 - shift;
1590            }
1591            gen_extu(ot, cpu_T0);
1592            tcg_gen_shli_tl(cpu_tmp0, cpu_T0, shift);
1593            tcg_gen_shri_tl(cpu_T0, cpu_T0, mask + 1 - shift);
1594            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
1595            break;
1596        }
1597    }
1598
1599    /* store */
1600    gen_op_st_rm_T0_A0(s, ot, op1);
1601
1602    if (op2 != 0) {
1603        /* Compute the flags into CC_SRC.  */
1604        gen_compute_eflags(s);
1605
1606        /* The value that was "rotated out" is now present at the other end
1607           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1608           since we've computed the flags into CC_SRC, these variables are
1609           currently dead.  */
1610        if (is_right) {
1611            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask - 1);
1612            tcg_gen_shri_tl(cpu_cc_dst, cpu_T0, mask);
1613            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1614        } else {
1615            tcg_gen_shri_tl(cpu_cc_src2, cpu_T0, mask);
1616            tcg_gen_andi_tl(cpu_cc_dst, cpu_T0, 1);
1617        }
1618        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1619        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1620        set_cc_op(s, CC_OP_ADCOX);
1621    }
1622}
1623
1624/* XXX: add faster immediate = 1 case */
1625static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1626                           int is_right)
1627{
1628    gen_compute_eflags(s);
1629    assert(s->cc_op == CC_OP_EFLAGS);
1630
1631    /* load */
1632    if (op1 == OR_TMP0)
1633        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1634    else
1635        gen_op_mov_v_reg(ot, cpu_T0, op1);
1636    
1637    if (is_right) {
1638        switch (ot) {
1639        case MO_8:
1640            gen_helper_rcrb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1641            break;
1642        case MO_16:
1643            gen_helper_rcrw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1644            break;
1645        case MO_32:
1646            gen_helper_rcrl(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1647            break;
1648#ifdef TARGET_X86_64
1649        case MO_64:
1650            gen_helper_rcrq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1651            break;
1652#endif
1653        default:
1654            tcg_abort();
1655        }
1656    } else {
1657        switch (ot) {
1658        case MO_8:
1659            gen_helper_rclb(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1660            break;
1661        case MO_16:
1662            gen_helper_rclw(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1663            break;
1664        case MO_32:
1665            gen_helper_rcll(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1666            break;
1667#ifdef TARGET_X86_64
1668        case MO_64:
1669            gen_helper_rclq(cpu_T0, cpu_env, cpu_T0, cpu_T1);
1670            break;
1671#endif
1672        default:
1673            tcg_abort();
1674        }
1675    }
1676    /* store */
1677    gen_op_st_rm_T0_A0(s, ot, op1);
1678}
1679
1680/* XXX: add faster immediate case */
1681static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1682                             bool is_right, TCGv count_in)
1683{
1684    target_ulong mask = (ot == MO_64 ? 63 : 31);
1685    TCGv count;
1686
1687    /* load */
1688    if (op1 == OR_TMP0) {
1689        gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
1690    } else {
1691        gen_op_mov_v_reg(ot, cpu_T0, op1);
1692    }
1693
1694    count = tcg_temp_new();
1695    tcg_gen_andi_tl(count, count_in, mask);
1696
1697    switch (ot) {
1698    case MO_16:
1699        /* Note: we implement the Intel behaviour for shift count > 16.
1700           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1701           portion by constructing it as a 32-bit value.  */
1702        if (is_right) {
1703            tcg_gen_deposit_tl(cpu_tmp0, cpu_T0, cpu_T1, 16, 16);
1704            tcg_gen_mov_tl(cpu_T1, cpu_T0);
1705            tcg_gen_mov_tl(cpu_T0, cpu_tmp0);
1706        } else {
1707            tcg_gen_deposit_tl(cpu_T1, cpu_T0, cpu_T1, 16, 16);
1708        }
1709        /* FALLTHRU */
1710#ifdef TARGET_X86_64
1711    case MO_32:
1712        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1713        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1714        if (is_right) {
1715            tcg_gen_concat_tl_i64(cpu_T0, cpu_T0, cpu_T1);
1716            tcg_gen_shr_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1717            tcg_gen_shr_i64(cpu_T0, cpu_T0, count);
1718        } else {
1719            tcg_gen_concat_tl_i64(cpu_T0, cpu_T1, cpu_T0);
1720            tcg_gen_shl_i64(cpu_tmp0, cpu_T0, cpu_tmp0);
1721            tcg_gen_shl_i64(cpu_T0, cpu_T0, count);
1722            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1723            tcg_gen_shri_i64(cpu_T0, cpu_T0, 32);
1724        }
1725        break;
1726#endif
1727    default:
1728        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1729        if (is_right) {
1730            tcg_gen_shr_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1731
1732            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1733            tcg_gen_shr_tl(cpu_T0, cpu_T0, count);
1734            tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_tmp4);
1735        } else {
1736            tcg_gen_shl_tl(cpu_tmp0, cpu_T0, cpu_tmp0);
1737            if (ot == MO_16) {
1738                /* Only needed if count > 16, for Intel behaviour.  */
1739                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1740                tcg_gen_shr_tl(cpu_tmp4, cpu_T1, cpu_tmp4);
1741                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1742            }
1743
1744            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1745            tcg_gen_shl_tl(cpu_T0, cpu_T0, count);
1746            tcg_gen_shr_tl(cpu_T1, cpu_T1, cpu_tmp4);
1747        }
1748        tcg_gen_movi_tl(cpu_tmp4, 0);
1749        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T1, count, cpu_tmp4,
1750                           cpu_tmp4, cpu_T1);
1751        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
1752        break;
1753    }
1754
1755    /* store */
1756    gen_op_st_rm_T0_A0(s, ot, op1);
1757
1758    gen_shift_flags(s, ot, cpu_T0, cpu_tmp0, count, is_right);
1759    tcg_temp_free(count);
1760}
1761
1762static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1763{
1764    if (s != OR_TMP1)
1765        gen_op_mov_v_reg(ot, cpu_T1, s);
1766    switch(op) {
1767    case OP_ROL:
1768        gen_rot_rm_T1(s1, ot, d, 0);
1769        break;
1770    case OP_ROR:
1771        gen_rot_rm_T1(s1, ot, d, 1);
1772        break;
1773    case OP_SHL:
1774    case OP_SHL1:
1775        gen_shift_rm_T1(s1, ot, d, 0, 0);
1776        break;
1777    case OP_SHR:
1778        gen_shift_rm_T1(s1, ot, d, 1, 0);
1779        break;
1780    case OP_SAR:
1781        gen_shift_rm_T1(s1, ot, d, 1, 1);
1782        break;
1783    case OP_RCL:
1784        gen_rotc_rm_T1(s1, ot, d, 0);
1785        break;
1786    case OP_RCR:
1787        gen_rotc_rm_T1(s1, ot, d, 1);
1788        break;
1789    }
1790}
1791
1792static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1793{
1794    switch(op) {
1795    case OP_ROL:
1796        gen_rot_rm_im(s1, ot, d, c, 0);
1797        break;
1798    case OP_ROR:
1799        gen_rot_rm_im(s1, ot, d, c, 1);
1800        break;
1801    case OP_SHL:
1802    case OP_SHL1:
1803        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1804        break;
1805    case OP_SHR:
1806        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1807        break;
1808    case OP_SAR:
1809        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1810        break;
1811    default:
1812        /* currently not optimized */
1813        tcg_gen_movi_tl(cpu_T1, c);
1814        gen_shift(s1, op, ot, d, OR_TMP1);
1815        break;
1816    }
1817}
1818
1819/* Decompose an address.  */
1820
1821typedef struct AddressParts {
1822    int def_seg;
1823    int base;
1824    int index;
1825    int scale;
1826    target_long disp;
1827} AddressParts;
1828
1829static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1830                                    int modrm)
1831{
1832    int def_seg, base, index, scale, mod, rm;
1833    target_long disp;
1834    bool havesib;
1835
1836    def_seg = R_DS;
1837    index = -1;
1838    scale = 0;
1839    disp = 0;
1840
1841    mod = (modrm >> 6) & 3;
1842    rm = modrm & 7;
1843    base = rm | REX_B(s);
1844
1845    if (mod == 3) {
1846        /* Normally filtered out earlier, but including this path
1847           simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1848        goto done;
1849    }
1850
1851    switch (s->aflag) {
1852    case MO_64:
1853    case MO_32:
1854        havesib = 0;
1855        if (rm == 4) {
1856            int code = cpu_ldub_code(env, s->pc++);
1857            scale = (code >> 6) & 3;
1858            index = ((code >> 3) & 7) | REX_X(s);
1859            if (index == 4) {
1860                index = -1;  /* no index */
1861            }
1862            base = (code & 7) | REX_B(s);
1863            havesib = 1;
1864        }
1865
1866        switch (mod) {
1867        case 0:
1868            if ((base & 7) == 5) {
1869                base = -1;
1870                disp = (int32_t)cpu_ldl_code(env, s->pc);
1871                s->pc += 4;
1872                if (CODE64(s) && !havesib) {
1873                    base = -2;
1874                    disp += s->pc + s->rip_offset;
1875                }
1876            }
1877            break;
1878        case 1:
1879            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1880            break;
1881        default:
1882        case 2:
1883            disp = (int32_t)cpu_ldl_code(env, s->pc);
1884            s->pc += 4;
1885            break;
1886        }
1887
1888        /* For correct popl handling with esp.  */
1889        if (base == R_ESP && s->popl_esp_hack) {
1890            disp += s->popl_esp_hack;
1891        }
1892        if (base == R_EBP || base == R_ESP) {
1893            def_seg = R_SS;
1894        }
1895        break;
1896
1897    case MO_16:
1898        if (mod == 0) {
1899            if (rm == 6) {
1900                base = -1;
1901                disp = cpu_lduw_code(env, s->pc);
1902                s->pc += 2;
1903                break;
1904            }
1905        } else if (mod == 1) {
1906            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1907        } else {
1908            disp = (int16_t)cpu_lduw_code(env, s->pc);
1909            s->pc += 2;
1910        }
1911
1912        switch (rm) {
1913        case 0:
1914            base = R_EBX;
1915            index = R_ESI;
1916            break;
1917        case 1:
1918            base = R_EBX;
1919            index = R_EDI;
1920            break;
1921        case 2:
1922            base = R_EBP;
1923            index = R_ESI;
1924            def_seg = R_SS;
1925            break;
1926        case 3:
1927            base = R_EBP;
1928            index = R_EDI;
1929            def_seg = R_SS;
1930            break;
1931        case 4:
1932            base = R_ESI;
1933            break;
1934        case 5:
1935            base = R_EDI;
1936            break;
1937        case 6:
1938            base = R_EBP;
1939            def_seg = R_SS;
1940            break;
1941        default:
1942        case 7:
1943            base = R_EBX;
1944            break;
1945        }
1946        break;
1947
1948    default:
1949        tcg_abort();
1950    }
1951
1952 done:
1953    return (AddressParts){ def_seg, base, index, scale, disp };
1954}
1955
1956/* Compute the address, with a minimum number of TCG ops.  */
1957static TCGv gen_lea_modrm_1(AddressParts a)
1958{
1959    TCGv ea;
1960
1961    TCGV_UNUSED(ea);
1962    if (a.index >= 0) {
1963        if (a.scale == 0) {
1964            ea = cpu_regs[a.index];
1965        } else {
1966            tcg_gen_shli_tl(cpu_A0, cpu_regs[a.index], a.scale);
1967            ea = cpu_A0;
1968        }
1969        if (a.base >= 0) {
1970            tcg_gen_add_tl(cpu_A0, ea, cpu_regs[a.base]);
1971            ea = cpu_A0;
1972        }
1973    } else if (a.base >= 0) {
1974        ea = cpu_regs[a.base];
1975    }
1976    if (TCGV_IS_UNUSED(ea)) {
1977        tcg_gen_movi_tl(cpu_A0, a.disp);
1978        ea = cpu_A0;
1979    } else if (a.disp != 0) {
1980        tcg_gen_addi_tl(cpu_A0, ea, a.disp);
1981        ea = cpu_A0;
1982    }
1983
1984    return ea;
1985}
1986
1987static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
1988{
1989    AddressParts a = gen_lea_modrm_0(env, s, modrm);
1990    TCGv ea = gen_lea_modrm_1(a);
1991    gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
1992}
1993
1994static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
1995{
1996    (void)gen_lea_modrm_0(env, s, modrm);
1997}
1998
1999/* Used for BNDCL, BNDCU, BNDCN.  */
2000static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2001                      TCGCond cond, TCGv_i64 bndv)
2002{
2003    TCGv ea = gen_lea_modrm_1(gen_lea_modrm_0(env, s, modrm));
2004
2005    tcg_gen_extu_tl_i64(cpu_tmp1_i64, ea);
2006    if (!CODE64(s)) {
2007        tcg_gen_ext32u_i64(cpu_tmp1_i64, cpu_tmp1_i64);
2008    }
2009    tcg_gen_setcond_i64(cond, cpu_tmp1_i64, cpu_tmp1_i64, bndv);
2010    tcg_gen_extrl_i64_i32(cpu_tmp2_i32, cpu_tmp1_i64);
2011    gen_helper_bndck(cpu_env, cpu_tmp2_i32);
2012}
2013
2014/* used for LEA and MOV AX, mem */
2015static void gen_add_A0_ds_seg(DisasContext *s)
2016{
2017    gen_lea_v_seg(s, s->aflag, cpu_A0, R_DS, s->override);
2018}
2019
2020/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2021   OR_TMP0 */
2022static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2023                           TCGMemOp ot, int reg, int is_store)
2024{
2025    int mod, rm;
2026
2027    mod = (modrm >> 6) & 3;
2028    rm = (modrm & 7) | REX_B(s);
2029    if (mod == 3) {
2030        if (is_store) {
2031            if (reg != OR_TMP0)
2032                gen_op_mov_v_reg(ot, cpu_T0, reg);
2033            gen_op_mov_reg_v(ot, rm, cpu_T0);
2034        } else {
2035            gen_op_mov_v_reg(ot, cpu_T0, rm);
2036            if (reg != OR_TMP0)
2037                gen_op_mov_reg_v(ot, reg, cpu_T0);
2038        }
2039    } else {
2040        gen_lea_modrm(env, s, modrm);
2041        if (is_store) {
2042            if (reg != OR_TMP0)
2043                gen_op_mov_v_reg(ot, cpu_T0, reg);
2044            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
2045        } else {
2046            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
2047            if (reg != OR_TMP0)
2048                gen_op_mov_reg_v(ot, reg, cpu_T0);
2049        }
2050    }
2051}
2052
2053static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2054{
2055    uint32_t ret;
2056
2057    switch (ot) {
2058    case MO_8:
2059        ret = cpu_ldub_code(env, s->pc);
2060        s->pc++;
2061        break;
2062    case MO_16:
2063        ret = cpu_lduw_code(env, s->pc);
2064        s->pc += 2;
2065        break;
2066    case MO_32:
2067#ifdef TARGET_X86_64
2068    case MO_64:
2069#endif
2070        ret = cpu_ldl_code(env, s->pc);
2071        s->pc += 4;
2072        break;
2073    default:
2074        tcg_abort();
2075    }
2076    return ret;
2077}
2078
2079static inline int insn_const_size(TCGMemOp ot)
2080{
2081    if (ot <= MO_32) {
2082        return 1 << ot;
2083    } else {
2084        return 4;
2085    }
2086}
2087
2088static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2089{
2090    TranslationBlock *tb;
2091    target_ulong pc;
2092
2093    pc = s->cs_base + eip;
2094    tb = s->tb;
2095    /* NOTE: we handle the case where the TB spans two pages here */
2096    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2097        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
2098        /* jump to same page: we can use a direct jump */
2099        tcg_gen_goto_tb(tb_num);
2100        gen_jmp_im(eip);
2101        tcg_gen_exit_tb((uintptr_t)tb + tb_num);
2102    } else {
2103        /* jump to another page: currently not optimized */
2104        gen_jmp_im(eip);
2105        gen_eob(s);
2106    }
2107}
2108
2109static inline void gen_jcc(DisasContext *s, int b,
2110                           target_ulong val, target_ulong next_eip)
2111{
2112    TCGLabel *l1, *l2;
2113
2114    if (s->jmp_opt) {
2115        l1 = gen_new_label();
2116        gen_jcc1(s, b, l1);
2117
2118        gen_goto_tb(s, 0, next_eip);
2119
2120        gen_set_label(l1);
2121        gen_goto_tb(s, 1, val);
2122        s->is_jmp = DISAS_TB_JUMP;
2123    } else {
2124        l1 = gen_new_label();
2125        l2 = gen_new_label();
2126        gen_jcc1(s, b, l1);
2127
2128        gen_jmp_im(next_eip);
2129        tcg_gen_br(l2);
2130
2131        gen_set_label(l1);
2132        gen_jmp_im(val);
2133        gen_set_label(l2);
2134        gen_eob(s);
2135    }
2136}
2137
2138static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2139                        int modrm, int reg)
2140{
2141    CCPrepare cc;
2142
2143    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2144
2145    cc = gen_prepare_cc(s, b, cpu_T1);
2146    if (cc.mask != -1) {
2147        TCGv t0 = tcg_temp_new();
2148        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2149        cc.reg = t0;
2150    }
2151    if (!cc.use_reg2) {
2152        cc.reg2 = tcg_const_tl(cc.imm);
2153    }
2154
2155    tcg_gen_movcond_tl(cc.cond, cpu_T0, cc.reg, cc.reg2,
2156                       cpu_T0, cpu_regs[reg]);
2157    gen_op_mov_reg_v(ot, reg, cpu_T0);
2158
2159    if (cc.mask != -1) {
2160        tcg_temp_free(cc.reg);
2161    }
2162    if (!cc.use_reg2) {
2163        tcg_temp_free(cc.reg2);
2164    }
2165}
2166
2167static inline void gen_op_movl_T0_seg(int seg_reg)
2168{
2169    tcg_gen_ld32u_tl(cpu_T0, cpu_env,
2170                     offsetof(CPUX86State,segs[seg_reg].selector));
2171}
2172
2173static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2174{
2175    tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
2176    tcg_gen_st32_tl(cpu_T0, cpu_env,
2177                    offsetof(CPUX86State,segs[seg_reg].selector));
2178    tcg_gen_shli_tl(cpu_seg_base[seg_reg], cpu_T0, 4);
2179}
2180
2181/* move T0 to seg_reg and compute if the CPU state may change. Never
2182   call this function with seg_reg == R_CS */
2183static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2184{
2185    if (s->pe && !s->vm86) {
2186        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
2187        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2188        /* abort translation because the addseg value may change or
2189           because ss32 may change. For R_SS, translation must always
2190           stop as a special handling must be done to disable hardware
2191           interrupts for the next instruction */
2192        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2193            s->is_jmp = DISAS_TB_JUMP;
2194    } else {
2195        gen_op_movl_seg_T0_vm(seg_reg);
2196        if (seg_reg == R_SS)
2197            s->is_jmp = DISAS_TB_JUMP;
2198    }
2199}
2200
2201static inline int svm_is_rep(int prefixes)
2202{
2203    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2204}
2205
2206static inline void
2207gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2208                              uint32_t type, uint64_t param)
2209{
2210    /* no SVM activated; fast case */
2211    if (likely(!(s->flags & HF_SVMI_MASK)))
2212        return;
2213    gen_update_cc_op(s);
2214    gen_jmp_im(pc_start - s->cs_base);
2215    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2216                                         tcg_const_i64(param));
2217}
2218
2219static inline void
2220gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2221{
2222    gen_svm_check_intercept_param(s, pc_start, type, 0);
2223}
2224
2225static inline void gen_stack_update(DisasContext *s, int addend)
2226{
2227    gen_op_add_reg_im(mo_stacksize(s), R_ESP, addend);
2228}
2229
2230/* Generate a push. It depends on ss32, addseg and dflag.  */
2231static void gen_push_v(DisasContext *s, TCGv val)
2232{
2233    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2234    TCGMemOp a_ot = mo_stacksize(s);
2235    int size = 1 << d_ot;
2236    TCGv new_esp = cpu_A0;
2237
2238    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2239
2240    if (!CODE64(s)) {
2241        if (s->addseg) {
2242            new_esp = cpu_tmp4;
2243            tcg_gen_mov_tl(new_esp, cpu_A0);
2244        }
2245        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2246    }
2247
2248    gen_op_st_v(s, d_ot, val, cpu_A0);
2249    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2250}
2251
2252/* two step pop is necessary for precise exceptions */
2253static TCGMemOp gen_pop_T0(DisasContext *s)
2254{
2255    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2256
2257    gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2258    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2259
2260    return d_ot;
2261}
2262
2263static inline void gen_pop_update(DisasContext *s, TCGMemOp ot)
2264{
2265    gen_stack_update(s, 1 << ot);
2266}
2267
2268static inline void gen_stack_A0(DisasContext *s)
2269{
2270    gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2271}
2272
2273static void gen_pusha(DisasContext *s)
2274{
2275    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2276    TCGMemOp d_ot = s->dflag;
2277    int size = 1 << d_ot;
2278    int i;
2279
2280    for (i = 0; i < 8; i++) {
2281        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], (i - 8) * size);
2282        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2283        gen_op_st_v(s, d_ot, cpu_regs[7 - i], cpu_A0);
2284    }
2285
2286    gen_stack_update(s, -8 * size);
2287}
2288
2289static void gen_popa(DisasContext *s)
2290{
2291    TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16;
2292    TCGMemOp d_ot = s->dflag;
2293    int size = 1 << d_ot;
2294    int i;
2295
2296    for (i = 0; i < 8; i++) {
2297        /* ESP is not reloaded */
2298        if (7 - i == R_ESP) {
2299            continue;
2300        }
2301        tcg_gen_addi_tl(cpu_A0, cpu_regs[R_ESP], i * size);
2302        gen_lea_v_seg(s, s_ot, cpu_A0, R_SS, -1);
2303        gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2304        gen_op_mov_reg_v(d_ot, 7 - i, cpu_T0);
2305    }
2306
2307    gen_stack_update(s, 8 * size);
2308}
2309
2310static void gen_enter(DisasContext *s, int esp_addend, int level)
2311{
2312    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2313    TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2314    int size = 1 << d_ot;
2315
2316    /* Push BP; compute FrameTemp into T1.  */
2317    tcg_gen_subi_tl(cpu_T1, cpu_regs[R_ESP], size);
2318    gen_lea_v_seg(s, a_ot, cpu_T1, R_SS, -1);
2319    gen_op_st_v(s, d_ot, cpu_regs[R_EBP], cpu_A0);
2320
2321    level &= 31;
2322    if (level != 0) {
2323        int i;
2324
2325        /* Copy level-1 pointers from the previous frame.  */
2326        for (i = 1; i < level; ++i) {
2327            tcg_gen_subi_tl(cpu_A0, cpu_regs[R_EBP], size * i);
2328            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2329            gen_op_ld_v(s, d_ot, cpu_tmp0, cpu_A0);
2330
2331            tcg_gen_subi_tl(cpu_A0, cpu_T1, size * i);
2332            gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2333            gen_op_st_v(s, d_ot, cpu_tmp0, cpu_A0);
2334        }
2335
2336        /* Push the current FrameTemp as the last level.  */
2337        tcg_gen_subi_tl(cpu_A0, cpu_T1, size * level);
2338        gen_lea_v_seg(s, a_ot, cpu_A0, R_SS, -1);
2339        gen_op_st_v(s, d_ot, cpu_T1, cpu_A0);
2340    }
2341
2342    /* Copy the FrameTemp value to EBP.  */
2343    gen_op_mov_reg_v(a_ot, R_EBP, cpu_T1);
2344
2345    /* Compute the final value of ESP.  */
2346    tcg_gen_subi_tl(cpu_T1, cpu_T1, esp_addend + size * level);
2347    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2348}
2349
2350static void gen_leave(DisasContext *s)
2351{
2352    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2353    TCGMemOp a_ot = mo_stacksize(s);
2354
2355    gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2356    gen_op_ld_v(s, d_ot, cpu_T0, cpu_A0);
2357
2358    tcg_gen_addi_tl(cpu_T1, cpu_regs[R_EBP], 1 << d_ot);
2359
2360    gen_op_mov_reg_v(d_ot, R_EBP, cpu_T0);
2361    gen_op_mov_reg_v(a_ot, R_ESP, cpu_T1);
2362}
2363
2364static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2365{
2366    gen_update_cc_op(s);
2367    gen_jmp_im(cur_eip);
2368    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2369    s->is_jmp = DISAS_TB_JUMP;
2370}
2371
2372/* Generate #UD for the current instruction.  The assumption here is that
2373   the instruction is known, but it isn't allowed in the current cpu mode.  */
2374static void gen_illegal_opcode(DisasContext *s)
2375{
2376    gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
2377}
2378
2379/* Similarly, except that the assumption here is that we don't decode
2380   the instruction at all -- either a missing opcode, an unimplemented
2381   feature, or just a bogus instruction stream.  */
2382static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2383{
2384    gen_illegal_opcode(s);
2385
2386    if (qemu_loglevel_mask(LOG_UNIMP)) {
2387        target_ulong pc = s->pc_start, end = s->pc;
2388        qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2389        for (; pc < end; ++pc) {
2390            qemu_log(" %02x", cpu_ldub_code(env, pc));
2391        }
2392        qemu_log("\n");
2393    }
2394}
2395
2396/* an interrupt is different from an exception because of the
2397   privilege checks */
2398static void gen_interrupt(DisasContext *s, int intno,
2399                          target_ulong cur_eip, target_ulong next_eip)
2400{
2401    gen_update_cc_op(s);
2402    gen_jmp_im(cur_eip);
2403    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2404                               tcg_const_i32(next_eip - cur_eip));
2405    s->is_jmp = DISAS_TB_JUMP;
2406}
2407
2408static void gen_debug(DisasContext *s, target_ulong cur_eip)
2409{
2410    gen_update_cc_op(s);
2411    gen_jmp_im(cur_eip);
2412    gen_helper_debug(cpu_env);
2413    s->is_jmp = DISAS_TB_JUMP;
2414}
2415
2416static void gen_set_hflag(DisasContext *s, uint32_t mask)
2417{
2418    if ((s->flags & mask) == 0) {
2419        TCGv_i32 t = tcg_temp_new_i32();
2420        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2421        tcg_gen_ori_i32(t, t, mask);
2422        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2423        tcg_temp_free_i32(t);
2424        s->flags |= mask;
2425    }
2426}
2427
2428static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2429{
2430    if (s->flags & mask) {
2431        TCGv_i32 t = tcg_temp_new_i32();
2432        tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2433        tcg_gen_andi_i32(t, t, ~mask);
2434        tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2435        tcg_temp_free_i32(t);
2436        s->flags &= ~mask;
2437    }
2438}
2439
2440/* Clear BND registers during legacy branches.  */
2441static void gen_bnd_jmp(DisasContext *s)
2442{
2443    /* Clear the registers only if BND prefix is missing, MPX is enabled,
2444       and if the BNDREGs are known to be in use (non-zero) already.
2445       The helper itself will check BNDPRESERVE at runtime.  */
2446    if ((s->prefix & PREFIX_REPNZ) == 0
2447        && (s->flags & HF_MPX_EN_MASK) != 0
2448        && (s->flags & HF_MPX_IU_MASK) != 0) {
2449        gen_helper_bnd_jmp(cpu_env);
2450    }
2451}
2452
2453/* Generate an end of block. Trace exception is also generated if needed.
2454   If IIM, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2455static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2456{
2457    gen_update_cc_op(s);
2458
2459    /* If several instructions disable interrupts, only the first does it.  */
2460    if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2461        gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2462    } else {
2463        gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2464    }
2465
2466    if (s->tb->flags & HF_RF_MASK) {
2467        gen_helper_reset_rf(cpu_env);
2468    }
2469    if (s->singlestep_enabled) {
2470        gen_helper_debug(cpu_env);
2471    } else if (s->tf) {
2472        gen_helper_single_step(cpu_env);
2473    } else {
2474        tcg_gen_exit_tb(0);
2475    }
2476    s->is_jmp = DISAS_TB_JUMP;
2477}
2478
2479/* End of block, resetting the inhibit irq flag.  */
2480static void gen_eob(DisasContext *s)
2481{
2482    gen_eob_inhibit_irq(s, false);
2483}
2484
2485/* generate a jump to eip. No segment change must happen before as a
2486   direct call to the next block may occur */
2487static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2488{
2489    gen_update_cc_op(s);
2490    set_cc_op(s, CC_OP_DYNAMIC);
2491    if (s->jmp_opt) {
2492        gen_goto_tb(s, tb_num, eip);
2493        s->is_jmp = DISAS_TB_JUMP;
2494    } else {
2495        gen_jmp_im(eip);
2496        gen_eob(s);
2497    }
2498}
2499
2500static void gen_jmp(DisasContext *s, target_ulong eip)
2501{
2502    gen_jmp_tb(s, eip, 0);
2503}
2504
2505static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2506{
2507    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2508    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2509}
2510
2511static inline void gen_stq_env_A0(DisasContext *s, int offset)
2512{
2513    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2514    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2515}
2516
2517static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2518{
2519    int mem_index = s->mem_index;
2520    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2521    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2522    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2523    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2524    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2525}
2526
2527static inline void gen_sto_env_A0(DisasContext *s, int offset)
2528{
2529    int mem_index = s->mem_index;
2530    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2531    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2532    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2533    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2534    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2535}
2536
2537static inline void gen_op_movo(int d_offset, int s_offset)
2538{
2539    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2540    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2541    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2542    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2543}
2544
2545static inline void gen_op_movq(int d_offset, int s_offset)
2546{
2547    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2548    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2549}
2550
2551static inline void gen_op_movl(int d_offset, int s_offset)
2552{
2553    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2554    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2555}
2556
2557static inline void gen_op_movq_env_0(int d_offset)
2558{
2559    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2560    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2561}
2562
2563typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2564typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2565typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2566typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2567typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2568typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2569                               TCGv_i32 val);
2570typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2571typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2572                               TCGv val);
2573
2574#define SSE_SPECIAL ((void *)1)
2575#define SSE_DUMMY ((void *)2)
2576
2577#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2578#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2579                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2580
2581static const SSEFunc_0_epp sse_op_table1[256][4] = {
2582    /* 3DNow! extensions */
2583    [0x0e] = { SSE_DUMMY }, /* femms */
2584    [0x0f] = { SSE_DUMMY }, /* pf... */
2585    /* pure SSE operations */
2586    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2587    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2588    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2589    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2590    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2591    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2592    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2593    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2594
2595    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2596    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2597    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2598    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2599    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2600    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2601    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2602    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2603    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2604    [0x51] = SSE_FOP(sqrt),
2605    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2606    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2607    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2608    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2609    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2610    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2611    [0x58] = SSE_FOP(add),
2612    [0x59] = SSE_FOP(mul),
2613    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2614               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2615    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2616    [0x5c] = SSE_FOP(sub),
2617    [0x5d] = SSE_FOP(min),
2618    [0x5e] = SSE_FOP(div),
2619    [0x5f] = SSE_FOP(max),
2620
2621    [0xc2] = SSE_FOP(cmpeq),
2622    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2623               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2624
2625    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2626    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2627    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2628
2629    /* MMX ops and their SSE extensions */
2630    [0x60] = MMX_OP2(punpcklbw),
2631    [0x61] = MMX_OP2(punpcklwd),
2632    [0x62] = MMX_OP2(punpckldq),
2633    [0x63] = MMX_OP2(packsswb),
2634    [0x64] = MMX_OP2(pcmpgtb),
2635    [0x65] = MMX_OP2(pcmpgtw),
2636    [0x66] = MMX_OP2(pcmpgtl),
2637    [0x67] = MMX_OP2(packuswb),
2638    [0x68] = MMX_OP2(punpckhbw),
2639    [0x69] = MMX_OP2(punpckhwd),
2640    [0x6a] = MMX_OP2(punpckhdq),
2641    [0x6b] = MMX_OP2(packssdw),
2642    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2643    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2644    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2645    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2646    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2647               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2648               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2649               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2650    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2651    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2652    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2653    [0x74] = MMX_OP2(pcmpeqb),
2654    [0x75] = MMX_OP2(pcmpeqw),
2655    [0x76] = MMX_OP2(pcmpeql),
2656    [0x77] = { SSE_DUMMY }, /* emms */
2657    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2658    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2659    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2660    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2661    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2662    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2663    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2664    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2665    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2666    [0xd1] = MMX_OP2(psrlw),
2667    [0xd2] = MMX_OP2(psrld),
2668    [0xd3] = MMX_OP2(psrlq),
2669    [0xd4] = MMX_OP2(paddq),
2670    [0xd5] = MMX_OP2(pmullw),
2671    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2672    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2673    [0xd8] = MMX_OP2(psubusb),
2674    [0xd9] = MMX_OP2(psubusw),
2675    [0xda] = MMX_OP2(pminub),
2676    [0xdb] = MMX_OP2(pand),
2677    [0xdc] = MMX_OP2(paddusb),
2678    [0xdd] = MMX_OP2(paddusw),
2679    [0xde] = MMX_OP2(pmaxub),
2680    [0xdf] = MMX_OP2(pandn),
2681    [0xe0] = MMX_OP2(pavgb),
2682    [0xe1] = MMX_OP2(psraw),
2683    [0xe2] = MMX_OP2(psrad),
2684    [0xe3] = MMX_OP2(pavgw),
2685    [0xe4] = MMX_OP2(pmulhuw),
2686    [0xe5] = MMX_OP2(pmulhw),
2687    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2688    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2689    [0xe8] = MMX_OP2(psubsb),
2690    [0xe9] = MMX_OP2(psubsw),
2691    [0xea] = MMX_OP2(pminsw),
2692    [0xeb] = MMX_OP2(por),
2693    [0xec] = MMX_OP2(paddsb),
2694    [0xed] = MMX_OP2(paddsw),
2695    [0xee] = MMX_OP2(pmaxsw),
2696    [0xef] = MMX_OP2(pxor),
2697    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2698    [0xf1] = MMX_OP2(psllw),
2699    [0xf2] = MMX_OP2(pslld),
2700    [0xf3] = MMX_OP2(psllq),
2701    [0xf4] = MMX_OP2(pmuludq),
2702    [0xf5] = MMX_OP2(pmaddwd),
2703    [0xf6] = MMX_OP2(psadbw),
2704    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2705               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2706    [0xf8] = MMX_OP2(psubb),
2707    [0xf9] = MMX_OP2(psubw),
2708    [0xfa] = MMX_OP2(psubl),
2709    [0xfb] = MMX_OP2(psubq),
2710    [0xfc] = MMX_OP2(paddb),
2711    [0xfd] = MMX_OP2(paddw),
2712    [0xfe] = MMX_OP2(paddl),
2713};
2714
2715static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2716    [0 + 2] = MMX_OP2(psrlw),
2717    [0 + 4] = MMX_OP2(psraw),
2718    [0 + 6] = MMX_OP2(psllw),
2719    [8 + 2] = MMX_OP2(psrld),
2720    [8 + 4] = MMX_OP2(psrad),
2721    [8 + 6] = MMX_OP2(pslld),
2722    [16 + 2] = MMX_OP2(psrlq),
2723    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2724    [16 + 6] = MMX_OP2(psllq),
2725    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2726};
2727
2728static const SSEFunc_0_epi sse_op_table3ai[] = {
2729    gen_helper_cvtsi2ss,
2730    gen_helper_cvtsi2sd
2731};
2732
2733#ifdef TARGET_X86_64
2734static const SSEFunc_0_epl sse_op_table3aq[] = {
2735    gen_helper_cvtsq2ss,
2736    gen_helper_cvtsq2sd
2737};
2738#endif
2739
2740static const SSEFunc_i_ep sse_op_table3bi[] = {
2741    gen_helper_cvttss2si,
2742    gen_helper_cvtss2si,
2743    gen_helper_cvttsd2si,
2744    gen_helper_cvtsd2si
2745};
2746
2747#ifdef TARGET_X86_64
2748static const SSEFunc_l_ep sse_op_table3bq[] = {
2749    gen_helper_cvttss2sq,
2750    gen_helper_cvtss2sq,
2751    gen_helper_cvttsd2sq,
2752    gen_helper_cvtsd2sq
2753};
2754#endif
2755
2756static const SSEFunc_0_epp sse_op_table4[8][4] = {
2757    SSE_FOP(cmpeq),
2758    SSE_FOP(cmplt),
2759    SSE_FOP(cmple),
2760    SSE_FOP(cmpunord),
2761    SSE_FOP(cmpneq),
2762    SSE_FOP(cmpnlt),
2763    SSE_FOP(cmpnle),
2764    SSE_FOP(cmpord),
2765};
2766
2767static const SSEFunc_0_epp sse_op_table5[256] = {
2768    [0x0c] = gen_helper_pi2fw,
2769    [0x0d] = gen_helper_pi2fd,
2770    [0x1c] = gen_helper_pf2iw,
2771    [0x1d] = gen_helper_pf2id,
2772    [0x8a] = gen_helper_pfnacc,
2773    [0x8e] = gen_helper_pfpnacc,
2774    [0x90] = gen_helper_pfcmpge,
2775    [0x94] = gen_helper_pfmin,
2776    [0x96] = gen_helper_pfrcp,
2777    [0x97] = gen_helper_pfrsqrt,
2778    [0x9a] = gen_helper_pfsub,
2779    [0x9e] = gen_helper_pfadd,
2780    [0xa0] = gen_helper_pfcmpgt,
2781    [0xa4] = gen_helper_pfmax,
2782    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2783    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2784    [0xaa] = gen_helper_pfsubr,
2785    [0xae] = gen_helper_pfacc,
2786    [0xb0] = gen_helper_pfcmpeq,
2787    [0xb4] = gen_helper_pfmul,
2788    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2789    [0xb7] = gen_helper_pmulhrw_mmx,
2790    [0xbb] = gen_helper_pswapd,
2791    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2792};
2793
2794struct SSEOpHelper_epp {
2795    SSEFunc_0_epp op[2];
2796    uint32_t ext_mask;
2797};
2798
2799struct SSEOpHelper_eppi {
2800    SSEFunc_0_eppi op[2];
2801    uint32_t ext_mask;
2802};
2803
2804#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2805#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2806#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2807#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2808#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2809        CPUID_EXT_PCLMULQDQ }
2810#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2811
2812static const struct SSEOpHelper_epp sse_op_table6[256] = {
2813    [0x00] = SSSE3_OP(pshufb),
2814    [0x01] = SSSE3_OP(phaddw),
2815    [0x02] = SSSE3_OP(phaddd),
2816    [0x03] = SSSE3_OP(phaddsw),
2817    [0x04] = SSSE3_OP(pmaddubsw),
2818    [0x05] = SSSE3_OP(phsubw),
2819    [0x06] = SSSE3_OP(phsubd),
2820    [0x07] = SSSE3_OP(phsubsw),
2821    [0x08] = SSSE3_OP(psignb),
2822    [0x09] = SSSE3_OP(psignw),
2823    [0x0a] = SSSE3_OP(psignd),
2824    [0x0b] = SSSE3_OP(pmulhrsw),
2825    [0x10] = SSE41_OP(pblendvb),
2826    [0x14] = SSE41_OP(blendvps),
2827    [0x15] = SSE41_OP(blendvpd),
2828    [0x17] = SSE41_OP(ptest),
2829    [0x1c] = SSSE3_OP(pabsb),
2830    [0x1d] = SSSE3_OP(pabsw),
2831    [0x1e] = SSSE3_OP(pabsd),
2832    [0x20] = SSE41_OP(pmovsxbw),
2833    [0x21] = SSE41_OP(pmovsxbd),
2834    [0x22] = SSE41_OP(pmovsxbq),
2835    [0x23] = SSE41_OP(pmovsxwd),
2836    [0x24] = SSE41_OP(pmovsxwq),
2837    [0x25] = SSE41_OP(pmovsxdq),
2838    [0x28] = SSE41_OP(pmuldq),
2839    [0x29] = SSE41_OP(pcmpeqq),
2840    [0x2a] = SSE41_SPECIAL, /* movntqda */
2841    [0x2b] = SSE41_OP(packusdw),
2842    [0x30] = SSE41_OP(pmovzxbw),
2843    [0x31] = SSE41_OP(pmovzxbd),
2844    [0x32] = SSE41_OP(pmovzxbq),
2845    [0x33] = SSE41_OP(pmovzxwd),
2846    [0x34] = SSE41_OP(pmovzxwq),
2847    [0x35] = SSE41_OP(pmovzxdq),
2848    [0x37] = SSE42_OP(pcmpgtq),
2849    [0x38] = SSE41_OP(pminsb),
2850    [0x39] = SSE41_OP(pminsd),
2851    [0x3a] = SSE41_OP(pminuw),
2852    [0x3b] = SSE41_OP(pminud),
2853    [0x3c] = SSE41_OP(pmaxsb),
2854    [0x3d] = SSE41_OP(pmaxsd),
2855    [0x3e] = SSE41_OP(pmaxuw),
2856    [0x3f] = SSE41_OP(pmaxud),
2857    [0x40] = SSE41_OP(pmulld),
2858    [0x41] = SSE41_OP(phminposuw),
2859    [0xdb] = AESNI_OP(aesimc),
2860    [0xdc] = AESNI_OP(aesenc),
2861    [0xdd] = AESNI_OP(aesenclast),
2862    [0xde] = AESNI_OP(aesdec),
2863    [0xdf] = AESNI_OP(aesdeclast),
2864};
2865
2866static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2867    [0x08] = SSE41_OP(roundps),
2868    [0x09] = SSE41_OP(roundpd),
2869    [0x0a] = SSE41_OP(roundss),
2870    [0x0b] = SSE41_OP(roundsd),
2871    [0x0c] = SSE41_OP(blendps),
2872    [0x0d] = SSE41_OP(blendpd),
2873    [0x0e] = SSE41_OP(pblendw),
2874    [0x0f] = SSSE3_OP(palignr),
2875    [0x14] = SSE41_SPECIAL, /* pextrb */
2876    [0x15] = SSE41_SPECIAL, /* pextrw */
2877    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2878    [0x17] = SSE41_SPECIAL, /* extractps */
2879    [0x20] = SSE41_SPECIAL, /* pinsrb */
2880    [0x21] = SSE41_SPECIAL, /* insertps */
2881    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2882    [0x40] = SSE41_OP(dpps),
2883    [0x41] = SSE41_OP(dppd),
2884    [0x42] = SSE41_OP(mpsadbw),
2885    [0x44] = PCLMULQDQ_OP(pclmulqdq),
2886    [0x60] = SSE42_OP(pcmpestrm),
2887    [0x61] = SSE42_OP(pcmpestri),
2888    [0x62] = SSE42_OP(pcmpistrm),
2889    [0x63] = SSE42_OP(pcmpistri),
2890    [0xdf] = AESNI_OP(aeskeygenassist),
2891};
2892
2893static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2894                    target_ulong pc_start, int rex_r)
2895{
2896    int b1, op1_offset, op2_offset, is_xmm, val;
2897    int modrm, mod, rm, reg;
2898    SSEFunc_0_epp sse_fn_epp;
2899    SSEFunc_0_eppi sse_fn_eppi;
2900    SSEFunc_0_ppi sse_fn_ppi;
2901    SSEFunc_0_eppt sse_fn_eppt;
2902    TCGMemOp ot;
2903
2904    b &= 0xff;
2905    if (s->prefix & PREFIX_DATA)
2906        b1 = 1;
2907    else if (s->prefix & PREFIX_REPZ)
2908        b1 = 2;
2909    else if (s->prefix & PREFIX_REPNZ)
2910        b1 = 3;
2911    else
2912        b1 = 0;
2913    sse_fn_epp = sse_op_table1[b][b1];
2914    if (!sse_fn_epp) {
2915        goto unknown_op;
2916    }
2917    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
2918        is_xmm = 1;
2919    } else {
2920        if (b1 == 0) {
2921            /* MMX case */
2922            is_xmm = 0;
2923        } else {
2924            is_xmm = 1;
2925        }
2926    }
2927    /* simple MMX/SSE operation */
2928    if (s->flags & HF_TS_MASK) {
2929        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
2930        return;
2931    }
2932    if (s->flags & HF_EM_MASK) {
2933    illegal_op:
2934        gen_illegal_opcode(s);
2935        return;
2936    }
2937    if (is_xmm
2938        && !(s->flags & HF_OSFXSR_MASK)
2939        && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) {
2940        goto unknown_op;
2941    }
2942    if (b == 0x0e) {
2943        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
2944            /* If we were fully decoding this we might use illegal_op.  */
2945            goto unknown_op;
2946        }
2947        /* femms */
2948        gen_helper_emms(cpu_env);
2949        return;
2950    }
2951    if (b == 0x77) {
2952        /* emms */
2953        gen_helper_emms(cpu_env);
2954        return;
2955    }
2956    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
2957       the static cpu state) */
2958    if (!is_xmm) {
2959        gen_helper_enter_mmx(cpu_env);
2960    }
2961
2962    modrm = cpu_ldub_code(env, s->pc++);
2963    reg = ((modrm >> 3) & 7);
2964    if (is_xmm)
2965        reg |= rex_r;
2966    mod = (modrm >> 6) & 3;
2967    if (sse_fn_epp == SSE_SPECIAL) {
2968        b |= (b1 << 8);
2969        switch(b) {
2970        case 0x0e7: /* movntq */
2971            if (mod == 3) {
2972                goto illegal_op;
2973            }
2974            gen_lea_modrm(env, s, modrm);
2975            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
2976            break;
2977        case 0x1e7: /* movntdq */
2978        case 0x02b: /* movntps */
2979        case 0x12b: /* movntps */
2980            if (mod == 3)
2981                goto illegal_op;
2982            gen_lea_modrm(env, s, modrm);
2983            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2984            break;
2985        case 0x3f0: /* lddqu */
2986            if (mod == 3)
2987                goto illegal_op;
2988            gen_lea_modrm(env, s, modrm);
2989            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
2990            break;
2991        case 0x22b: /* movntss */
2992        case 0x32b: /* movntsd */
2993            if (mod == 3)
2994                goto illegal_op;
2995            gen_lea_modrm(env, s, modrm);
2996            if (b1 & 1) {
2997                gen_stq_env_A0(s, offsetof(CPUX86State,
2998                                           xmm_regs[reg].ZMM_Q(0)));
2999            } else {
3000                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
3001                    xmm_regs[reg].ZMM_L(0)));
3002                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3003            }
3004            break;
3005        case 0x6e: /* movd mm, ea */
3006#ifdef TARGET_X86_64
3007            if (s->dflag == MO_64) {
3008                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3009                tcg_gen_st_tl(cpu_T0, cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3010            } else
3011#endif
3012            {
3013                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3014                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3015                                 offsetof(CPUX86State,fpregs[reg].mmx));
3016                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3017                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3018            }
3019            break;
3020        case 0x16e: /* movd xmm, ea */
3021#ifdef TARGET_X86_64
3022            if (s->dflag == MO_64) {
3023                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3024                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3025                                 offsetof(CPUX86State,xmm_regs[reg]));
3026                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T0);
3027            } else
3028#endif
3029            {
3030                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3031                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3032                                 offsetof(CPUX86State,xmm_regs[reg]));
3033                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3034                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3035            }
3036            break;
3037        case 0x6f: /* movq mm, ea */
3038            if (mod != 3) {
3039                gen_lea_modrm(env, s, modrm);
3040                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3041            } else {
3042                rm = (modrm & 7);
3043                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3044                               offsetof(CPUX86State,fpregs[rm].mmx));
3045                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3046                               offsetof(CPUX86State,fpregs[reg].mmx));
3047            }
3048            break;
3049        case 0x010: /* movups */
3050        case 0x110: /* movupd */
3051        case 0x028: /* movaps */
3052        case 0x128: /* movapd */
3053        case 0x16f: /* movdqa xmm, ea */
3054        case 0x26f: /* movdqu xmm, ea */
3055            if (mod != 3) {
3056                gen_lea_modrm(env, s, modrm);
3057                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3058            } else {
3059                rm = (modrm & 7) | REX_B(s);
3060                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3061                            offsetof(CPUX86State,xmm_regs[rm]));
3062            }
3063            break;
3064        case 0x210: /* movss xmm, ea */
3065            if (mod != 3) {
3066                gen_lea_modrm(env, s, modrm);
3067                gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3068                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3069                tcg_gen_movi_tl(cpu_T0, 0);
3070                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3071                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3072                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3073            } else {
3074                rm = (modrm & 7) | REX_B(s);
3075                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3076                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3077            }
3078            break;
3079        case 0x310: /* movsd xmm, ea */
3080            if (mod != 3) {
3081                gen_lea_modrm(env, s, modrm);
3082                gen_ldq_env_A0(s, offsetof(CPUX86State,
3083                                           xmm_regs[reg].ZMM_Q(0)));
3084                tcg_gen_movi_tl(cpu_T0, 0);
3085                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3086                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3087            } else {
3088                rm = (modrm & 7) | REX_B(s);
3089                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3090                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3091            }
3092            break;
3093        case 0x012: /* movlps */
3094        case 0x112: /* movlpd */
3095            if (mod != 3) {
3096                gen_lea_modrm(env, s, modrm);
3097                gen_ldq_env_A0(s, offsetof(CPUX86State,
3098                                           xmm_regs[reg].ZMM_Q(0)));
3099            } else {
3100                /* movhlps */
3101                rm = (modrm & 7) | REX_B(s);
3102                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3103                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3104            }
3105            break;
3106        case 0x212: /* movsldup */
3107            if (mod != 3) {
3108                gen_lea_modrm(env, s, modrm);
3109                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3110            } else {
3111                rm = (modrm & 7) | REX_B(s);
3112                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3113                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3114                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3115                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3116            }
3117            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3118                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3119            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3120                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3121            break;
3122        case 0x312: /* movddup */
3123            if (mod != 3) {
3124                gen_lea_modrm(env, s, modrm);
3125                gen_ldq_env_A0(s, offsetof(CPUX86State,
3126                                           xmm_regs[reg].ZMM_Q(0)));
3127            } else {
3128                rm = (modrm & 7) | REX_B(s);
3129                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3130                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3131            }
3132            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3133                        offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3134            break;
3135        case 0x016: /* movhps */
3136        case 0x116: /* movhpd */
3137            if (mod != 3) {
3138                gen_lea_modrm(env, s, modrm);
3139                gen_ldq_env_A0(s, offsetof(CPUX86State,
3140                                           xmm_regs[reg].ZMM_Q(1)));
3141            } else {
3142                /* movlhps */
3143                rm = (modrm & 7) | REX_B(s);
3144                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)),
3145                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3146            }
3147            break;
3148        case 0x216: /* movshdup */
3149            if (mod != 3) {
3150                gen_lea_modrm(env, s, modrm);
3151                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3152            } else {
3153                rm = (modrm & 7) | REX_B(s);
3154                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)),
3155                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3156                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)),
3157                            offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3158            }
3159            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)),
3160                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3161            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)),
3162                        offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3163            break;
3164        case 0x178:
3165        case 0x378:
3166            {
3167                int bit_index, field_length;
3168
3169                if (b1 == 1 && reg != 0)
3170                    goto illegal_op;
3171                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3172                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3173                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3174                    offsetof(CPUX86State,xmm_regs[reg]));
3175                if (b1 == 1)
3176                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
3177                                       tcg_const_i32(bit_index),
3178                                       tcg_const_i32(field_length));
3179                else
3180                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
3181                                         tcg_const_i32(bit_index),
3182                                         tcg_const_i32(field_length));
3183            }
3184            break;
3185        case 0x7e: /* movd ea, mm */
3186#ifdef TARGET_X86_64
3187            if (s->dflag == MO_64) {
3188                tcg_gen_ld_i64(cpu_T0, cpu_env,
3189                               offsetof(CPUX86State,fpregs[reg].mmx));
3190                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3191            } else
3192#endif
3193            {
3194                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3195                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3196                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3197            }
3198            break;
3199        case 0x17e: /* movd ea, xmm */
3200#ifdef TARGET_X86_64
3201            if (s->dflag == MO_64) {
3202                tcg_gen_ld_i64(cpu_T0, cpu_env,
3203                               offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3204                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3205            } else
3206#endif
3207            {
3208                tcg_gen_ld32u_tl(cpu_T0, cpu_env,
3209                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3210                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3211            }
3212            break;
3213        case 0x27e: /* movq xmm, ea */
3214            if (mod != 3) {
3215                gen_lea_modrm(env, s, modrm);
3216                gen_ldq_env_A0(s, offsetof(CPUX86State,
3217                                           xmm_regs[reg].ZMM_Q(0)));
3218            } else {
3219                rm = (modrm & 7) | REX_B(s);
3220                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3221                            offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3222            }
3223            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3224            break;
3225        case 0x7f: /* movq ea, mm */
3226            if (mod != 3) {
3227                gen_lea_modrm(env, s, modrm);
3228                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3229            } else {
3230                rm = (modrm & 7);
3231                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3232                            offsetof(CPUX86State,fpregs[reg].mmx));
3233            }
3234            break;
3235        case 0x011: /* movups */
3236        case 0x111: /* movupd */
3237        case 0x029: /* movaps */
3238        case 0x129: /* movapd */
3239        case 0x17f: /* movdqa ea, xmm */
3240        case 0x27f: /* movdqu ea, xmm */
3241            if (mod != 3) {
3242                gen_lea_modrm(env, s, modrm);
3243                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3244            } else {
3245                rm = (modrm & 7) | REX_B(s);
3246                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3247                            offsetof(CPUX86State,xmm_regs[reg]));
3248            }
3249            break;
3250        case 0x211: /* movss ea, xmm */
3251            if (mod != 3) {
3252                gen_lea_modrm(env, s, modrm);
3253                tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3254                gen_op_st_v(s, MO_32, cpu_T0, cpu_A0);
3255            } else {
3256                rm = (modrm & 7) | REX_B(s);
3257                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)),
3258                            offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3259            }
3260            break;
3261        case 0x311: /* movsd ea, xmm */
3262            if (mod != 3) {
3263                gen_lea_modrm(env, s, modrm);
3264                gen_stq_env_A0(s, offsetof(CPUX86State,
3265                                           xmm_regs[reg].ZMM_Q(0)));
3266            } else {
3267                rm = (modrm & 7) | REX_B(s);
3268                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3269                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3270            }
3271            break;
3272        case 0x013: /* movlps */
3273        case 0x113: /* movlpd */
3274            if (mod != 3) {
3275                gen_lea_modrm(env, s, modrm);
3276                gen_stq_env_A0(s, offsetof(CPUX86State,
3277                                           xmm_regs[reg].ZMM_Q(0)));
3278            } else {
3279                goto illegal_op;
3280            }
3281            break;
3282        case 0x017: /* movhps */
3283        case 0x117: /* movhpd */
3284            if (mod != 3) {
3285                gen_lea_modrm(env, s, modrm);
3286                gen_stq_env_A0(s, offsetof(CPUX86State,
3287                                           xmm_regs[reg].ZMM_Q(1)));
3288            } else {
3289                goto illegal_op;
3290            }
3291            break;
3292        case 0x71: /* shift mm, im */
3293        case 0x72:
3294        case 0x73:
3295        case 0x171: /* shift xmm, im */
3296        case 0x172:
3297        case 0x173:
3298            if (b1 >= 2) {
3299                goto unknown_op;
3300            }
3301            val = cpu_ldub_code(env, s->pc++);
3302            if (is_xmm) {
3303                tcg_gen_movi_tl(cpu_T0, val);
3304                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3305                tcg_gen_movi_tl(cpu_T0, 0);
3306                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(1)));
3307                op1_offset = offsetof(CPUX86State,xmm_t0);
3308            } else {
3309                tcg_gen_movi_tl(cpu_T0, val);
3310                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3311                tcg_gen_movi_tl(cpu_T0, 0);
3312                tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3313                op1_offset = offsetof(CPUX86State,mmx_t0);
3314            }
3315            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3316                                       (((modrm >> 3)) & 7)][b1];
3317            if (!sse_fn_epp) {
3318                goto unknown_op;
3319            }
3320            if (is_xmm) {
3321                rm = (modrm & 7) | REX_B(s);
3322                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3323            } else {
3324                rm = (modrm & 7);
3325                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3326            }
3327            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3328            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3329            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3330            break;
3331        case 0x050: /* movmskps */
3332            rm = (modrm & 7) | REX_B(s);
3333            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3334                             offsetof(CPUX86State,xmm_regs[rm]));
3335            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3336            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3337            break;
3338        case 0x150: /* movmskpd */
3339            rm = (modrm & 7) | REX_B(s);
3340            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3341                             offsetof(CPUX86State,xmm_regs[rm]));
3342            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3343            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3344            break;
3345        case 0x02a: /* cvtpi2ps */
3346        case 0x12a: /* cvtpi2pd */
3347            gen_helper_enter_mmx(cpu_env);
3348            if (mod != 3) {
3349                gen_lea_modrm(env, s, modrm);
3350                op2_offset = offsetof(CPUX86State,mmx_t0);
3351                gen_ldq_env_A0(s, op2_offset);
3352            } else {
3353                rm = (modrm & 7);
3354                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3355            }
3356            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3357            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3358            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3359            switch(b >> 8) {
3360            case 0x0:
3361                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3362                break;
3363            default:
3364            case 0x1:
3365                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3366                break;
3367            }
3368            break;
3369        case 0x22a: /* cvtsi2ss */
3370        case 0x32a: /* cvtsi2sd */
3371            ot = mo_64_32(s->dflag);
3372            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3373            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3374            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3375            if (ot == MO_32) {
3376                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3377                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3378                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3379            } else {
3380#ifdef TARGET_X86_64
3381                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3382                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T0);
3383#else
3384                goto illegal_op;
3385#endif
3386            }
3387            break;
3388        case 0x02c: /* cvttps2pi */
3389        case 0x12c: /* cvttpd2pi */
3390        case 0x02d: /* cvtps2pi */
3391        case 0x12d: /* cvtpd2pi */
3392            gen_helper_enter_mmx(cpu_env);
3393            if (mod != 3) {
3394                gen_lea_modrm(env, s, modrm);
3395                op2_offset = offsetof(CPUX86State,xmm_t0);
3396                gen_ldo_env_A0(s, op2_offset);
3397            } else {
3398                rm = (modrm & 7) | REX_B(s);
3399                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3400            }
3401            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3402            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3403            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3404            switch(b) {
3405            case 0x02c:
3406                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3407                break;
3408            case 0x12c:
3409                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3410                break;
3411            case 0x02d:
3412                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3413                break;
3414            case 0x12d:
3415                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3416                break;
3417            }
3418            break;
3419        case 0x22c: /* cvttss2si */
3420        case 0x32c: /* cvttsd2si */
3421        case 0x22d: /* cvtss2si */
3422        case 0x32d: /* cvtsd2si */
3423            ot = mo_64_32(s->dflag);
3424            if (mod != 3) {
3425                gen_lea_modrm(env, s, modrm);
3426                if ((b >> 8) & 1) {
3427                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3428                } else {
3429                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
3430                    tcg_gen_st32_tl(cpu_T0, cpu_env, offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
3431                }
3432                op2_offset = offsetof(CPUX86State,xmm_t0);
3433            } else {
3434                rm = (modrm & 7) | REX_B(s);
3435                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3436            }
3437            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3438            if (ot == MO_32) {
3439                SSEFunc_i_ep sse_fn_i_ep =
3440                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3441                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3442                tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
3443            } else {
3444#ifdef TARGET_X86_64
3445                SSEFunc_l_ep sse_fn_l_ep =
3446                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3447                sse_fn_l_ep(cpu_T0, cpu_env, cpu_ptr0);
3448#else
3449                goto illegal_op;
3450#endif
3451            }
3452            gen_op_mov_reg_v(ot, reg, cpu_T0);
3453            break;
3454        case 0xc4: /* pinsrw */
3455        case 0x1c4:
3456            s->rip_offset = 1;
3457            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3458            val = cpu_ldub_code(env, s->pc++);
3459            if (b1) {
3460                val &= 7;
3461                tcg_gen_st16_tl(cpu_T0, cpu_env,
3462                                offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3463            } else {
3464                val &= 3;
3465                tcg_gen_st16_tl(cpu_T0, cpu_env,
3466                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3467            }
3468            break;
3469        case 0xc5: /* pextrw */
3470        case 0x1c5:
3471            if (mod != 3)
3472                goto illegal_op;
3473            ot = mo_64_32(s->dflag);
3474            val = cpu_ldub_code(env, s->pc++);
3475            if (b1) {
3476                val &= 7;
3477                rm = (modrm & 7) | REX_B(s);
3478                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3479                                 offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3480            } else {
3481                val &= 3;
3482                rm = (modrm & 7);
3483                tcg_gen_ld16u_tl(cpu_T0, cpu_env,
3484                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3485            }
3486            reg = ((modrm >> 3) & 7) | rex_r;
3487            gen_op_mov_reg_v(ot, reg, cpu_T0);
3488            break;
3489        case 0x1d6: /* movq ea, xmm */
3490            if (mod != 3) {
3491                gen_lea_modrm(env, s, modrm);
3492                gen_stq_env_A0(s, offsetof(CPUX86State,
3493                                           xmm_regs[reg].ZMM_Q(0)));
3494            } else {
3495                rm = (modrm & 7) | REX_B(s);
3496                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)),
3497                            offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3498                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3499            }
3500            break;
3501        case 0x2d6: /* movq2dq */
3502            gen_helper_enter_mmx(cpu_env);
3503            rm = (modrm & 7);
3504            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)),
3505                        offsetof(CPUX86State,fpregs[rm].mmx));
3506            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(1)));
3507            break;
3508        case 0x3d6: /* movdq2q */
3509            gen_helper_enter_mmx(cpu_env);
3510            rm = (modrm & 7) | REX_B(s);
3511            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3512                        offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3513            break;
3514        case 0xd7: /* pmovmskb */
3515        case 0x1d7:
3516            if (mod != 3)
3517                goto illegal_op;
3518            if (b1) {
3519                rm = (modrm & 7) | REX_B(s);
3520                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3521                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3522            } else {
3523                rm = (modrm & 7);
3524                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3525                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3526            }
3527            reg = ((modrm >> 3) & 7) | rex_r;
3528            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3529            break;
3530
3531        case 0x138:
3532        case 0x038:
3533            b = modrm;
3534            if ((b & 0xf0) == 0xf0) {
3535                goto do_0f_38_fx;
3536            }
3537            modrm = cpu_ldub_code(env, s->pc++);
3538            rm = modrm & 7;
3539            reg = ((modrm >> 3) & 7) | rex_r;
3540            mod = (modrm >> 6) & 3;
3541            if (b1 >= 2) {
3542                goto unknown_op;
3543            }
3544
3545            sse_fn_epp = sse_op_table6[b].op[b1];
3546            if (!sse_fn_epp) {
3547                goto unknown_op;
3548            }
3549            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3550                goto illegal_op;
3551
3552            if (b1) {
3553                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3554                if (mod == 3) {
3555                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3556                } else {
3557                    op2_offset = offsetof(CPUX86State,xmm_t0);
3558                    gen_lea_modrm(env, s, modrm);
3559                    switch (b) {
3560                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3561                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3562                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3563                        gen_ldq_env_A0(s, op2_offset +
3564                                        offsetof(ZMMReg, ZMM_Q(0)));
3565                        break;
3566                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3567                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3568                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3569                                            s->mem_index, MO_LEUL);
3570                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3571                                        offsetof(ZMMReg, ZMM_L(0)));
3572                        break;
3573                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3574                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3575                                           s->mem_index, MO_LEUW);
3576                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3577                                        offsetof(ZMMReg, ZMM_W(0)));
3578                        break;
3579                    case 0x2a:            /* movntqda */
3580                        gen_ldo_env_A0(s, op1_offset);
3581                        return;
3582                    default:
3583                        gen_ldo_env_A0(s, op2_offset);
3584                    }
3585                }
3586            } else {
3587                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3588                if (mod == 3) {
3589                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3590                } else {
3591                    op2_offset = offsetof(CPUX86State,mmx_t0);
3592                    gen_lea_modrm(env, s, modrm);
3593                    gen_ldq_env_A0(s, op2_offset);
3594                }
3595            }
3596            if (sse_fn_epp == SSE_SPECIAL) {
3597                goto unknown_op;
3598            }
3599
3600            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3601            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3602            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3603
3604            if (b == 0x17) {
3605                set_cc_op(s, CC_OP_EFLAGS);
3606            }
3607            break;
3608
3609        case 0x238:
3610        case 0x338:
3611        do_0f_38_fx:
3612            /* Various integer extensions at 0f 38 f[0-f].  */
3613            b = modrm | (b1 << 8);
3614            modrm = cpu_ldub_code(env, s->pc++);
3615            reg = ((modrm >> 3) & 7) | rex_r;
3616
3617            switch (b) {
3618            case 0x3f0: /* crc32 Gd,Eb */
3619            case 0x3f1: /* crc32 Gd,Ey */
3620            do_crc32:
3621                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3622                    goto illegal_op;
3623                }
3624                if ((b & 0xff) == 0xf0) {
3625                    ot = MO_8;
3626                } else if (s->dflag != MO_64) {
3627                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3628                } else {
3629                    ot = MO_64;
3630                }
3631
3632                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3633                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3634                gen_helper_crc32(cpu_T0, cpu_tmp2_i32,
3635                                 cpu_T0, tcg_const_i32(8 << ot));
3636
3637                ot = mo_64_32(s->dflag);
3638                gen_op_mov_reg_v(ot, reg, cpu_T0);
3639                break;
3640
3641            case 0x1f0: /* crc32 or movbe */
3642            case 0x1f1:
3643                /* For these insns, the f3 prefix is supposed to have priority
3644                   over the 66 prefix, but that's not what we implement above
3645                   setting b1.  */
3646                if (s->prefix & PREFIX_REPNZ) {
3647                    goto do_crc32;
3648                }
3649                /* FALLTHRU */
3650            case 0x0f0: /* movbe Gy,My */
3651            case 0x0f1: /* movbe My,Gy */
3652                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3653                    goto illegal_op;
3654                }
3655                if (s->dflag != MO_64) {
3656                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3657                } else {
3658                    ot = MO_64;
3659                }
3660
3661                gen_lea_modrm(env, s, modrm);
3662                if ((b & 1) == 0) {
3663                    tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
3664                                       s->mem_index, ot | MO_BE);
3665                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3666                } else {
3667                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3668                                       s->mem_index, ot | MO_BE);
3669                }
3670                break;
3671
3672            case 0x0f2: /* andn Gy, By, Ey */
3673                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3674                    || !(s->prefix & PREFIX_VEX)
3675                    || s->vex_l != 0) {
3676                    goto illegal_op;
3677                }
3678                ot = mo_64_32(s->dflag);
3679                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3680                tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
3681                gen_op_mov_reg_v(ot, reg, cpu_T0);
3682                gen_op_update1_cc();
3683                set_cc_op(s, CC_OP_LOGICB + ot);
3684                break;
3685
3686            case 0x0f7: /* bextr Gy, Ey, By */
3687                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3688                    || !(s->prefix & PREFIX_VEX)
3689                    || s->vex_l != 0) {
3690                    goto illegal_op;
3691                }
3692                ot = mo_64_32(s->dflag);
3693                {
3694                    TCGv bound, zero;
3695
3696                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3697                    /* Extract START, and shift the operand.
3698                       Shifts larger than operand size get zeros.  */
3699                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3700                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_A0);
3701
3702                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3703                    zero = tcg_const_tl(0);
3704                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T0, cpu_A0, bound,
3705                                       cpu_T0, zero);
3706                    tcg_temp_free(zero);
3707
3708                    /* Extract the LEN into a mask.  Lengths larger than
3709                       operand size get all ones.  */
3710                    tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
3711                    tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
3712                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3713                                       cpu_A0, bound);
3714                    tcg_temp_free(bound);
3715                    tcg_gen_movi_tl(cpu_T1, 1);
3716                    tcg_gen_shl_tl(cpu_T1, cpu_T1, cpu_A0);
3717                    tcg_gen_subi_tl(cpu_T1, cpu_T1, 1);
3718                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3719
3720                    gen_op_mov_reg_v(ot, reg, cpu_T0);
3721                    gen_op_update1_cc();
3722                    set_cc_op(s, CC_OP_LOGICB + ot);
3723                }
3724                break;
3725
3726            case 0x0f5: /* bzhi Gy, Ey, By */
3727                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3728                    || !(s->prefix & PREFIX_VEX)
3729                    || s->vex_l != 0) {
3730                    goto illegal_op;
3731                }
3732                ot = mo_64_32(s->dflag);
3733                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3734                tcg_gen_ext8u_tl(cpu_T1, cpu_regs[s->vex_v]);
3735                {
3736                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3737                    /* Note that since we're using BMILG (in order to get O
3738                       cleared) we need to store the inverse into C.  */
3739                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3740                                       cpu_T1, bound);
3741                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T1, cpu_T1,
3742                                       bound, bound, cpu_T1);
3743                    tcg_temp_free(bound);
3744                }
3745                tcg_gen_movi_tl(cpu_A0, -1);
3746                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T1);
3747                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_A0);
3748                gen_op_mov_reg_v(ot, reg, cpu_T0);
3749                gen_op_update1_cc();
3750                set_cc_op(s, CC_OP_BMILGB + ot);
3751                break;
3752
3753            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3754                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3755                    || !(s->prefix & PREFIX_VEX)
3756                    || s->vex_l != 0) {
3757                    goto illegal_op;
3758                }
3759                ot = mo_64_32(s->dflag);
3760                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3761                switch (ot) {
3762                default:
3763                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
3764                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3765                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3766                                      cpu_tmp2_i32, cpu_tmp3_i32);
3767                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3768                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3769                    break;
3770#ifdef TARGET_X86_64
3771                case MO_64:
3772                    tcg_gen_mulu2_i64(cpu_T0, cpu_T1,
3773                                      cpu_T0, cpu_regs[R_EDX]);
3774                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T0);
3775                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T1);
3776                    break;
3777#endif
3778                }
3779                break;
3780
3781            case 0x3f5: /* pdep Gy, By, Ey */
3782                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3783                    || !(s->prefix & PREFIX_VEX)
3784                    || s->vex_l != 0) {
3785                    goto illegal_op;
3786                }
3787                ot = mo_64_32(s->dflag);
3788                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3789                /* Note that by zero-extending the mask operand, we
3790                   automatically handle zero-extending the result.  */
3791                if (ot == MO_64) {
3792                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3793                } else {
3794                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3795                }
3796                gen_helper_pdep(cpu_regs[reg], cpu_T0, cpu_T1);
3797                break;
3798
3799            case 0x2f5: /* pext Gy, By, Ey */
3800                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3801                    || !(s->prefix & PREFIX_VEX)
3802                    || s->vex_l != 0) {
3803                    goto illegal_op;
3804                }
3805                ot = mo_64_32(s->dflag);
3806                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3807                /* Note that by zero-extending the mask operand, we
3808                   automatically handle zero-extending the result.  */
3809                if (ot == MO_64) {
3810                    tcg_gen_mov_tl(cpu_T1, cpu_regs[s->vex_v]);
3811                } else {
3812                    tcg_gen_ext32u_tl(cpu_T1, cpu_regs[s->vex_v]);
3813                }
3814                gen_helper_pext(cpu_regs[reg], cpu_T0, cpu_T1);
3815                break;
3816
3817            case 0x1f6: /* adcx Gy, Ey */
3818            case 0x2f6: /* adox Gy, Ey */
3819                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3820                    goto illegal_op;
3821                } else {
3822                    TCGv carry_in, carry_out, zero;
3823                    int end_op;
3824
3825                    ot = mo_64_32(s->dflag);
3826                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3827
3828                    /* Re-use the carry-out from a previous round.  */
3829                    TCGV_UNUSED(carry_in);
3830                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3831                    switch (s->cc_op) {
3832                    case CC_OP_ADCX:
3833                        if (b == 0x1f6) {
3834                            carry_in = cpu_cc_dst;
3835                            end_op = CC_OP_ADCX;
3836                        } else {
3837                            end_op = CC_OP_ADCOX;
3838                        }
3839                        break;
3840                    case CC_OP_ADOX:
3841                        if (b == 0x1f6) {
3842                            end_op = CC_OP_ADCOX;
3843                        } else {
3844                            carry_in = cpu_cc_src2;
3845                            end_op = CC_OP_ADOX;
3846                        }
3847                        break;
3848                    case CC_OP_ADCOX:
3849                        end_op = CC_OP_ADCOX;
3850                        carry_in = carry_out;
3851                        break;
3852                    default:
3853                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3854                        break;
3855                    }
3856                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
3857                    if (TCGV_IS_UNUSED(carry_in)) {
3858                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3859                            gen_compute_eflags(s);
3860                        }
3861                        carry_in = cpu_tmp0;
3862                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
3863                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
3864                        tcg_gen_andi_tl(carry_in, carry_in, 1);
3865                    }
3866
3867                    switch (ot) {
3868#ifdef TARGET_X86_64
3869                    case MO_32:
3870                        /* If we know TL is 64-bit, and we want a 32-bit
3871                           result, just do everything in 64-bit arithmetic.  */
3872                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3873                        tcg_gen_ext32u_i64(cpu_T0, cpu_T0);
3874                        tcg_gen_add_i64(cpu_T0, cpu_T0, cpu_regs[reg]);
3875                        tcg_gen_add_i64(cpu_T0, cpu_T0, carry_in);
3876                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T0);
3877                        tcg_gen_shri_i64(carry_out, cpu_T0, 32);
3878                        break;
3879#endif
3880                    default:
3881                        /* Otherwise compute the carry-out in two steps.  */
3882                        zero = tcg_const_tl(0);
3883                        tcg_gen_add2_tl(cpu_T0, carry_out,
3884                                        cpu_T0, zero,
3885                                        carry_in, zero);
3886                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3887                                        cpu_regs[reg], carry_out,
3888                                        cpu_T0, zero);
3889                        tcg_temp_free(zero);
3890                        break;
3891                    }
3892                    set_cc_op(s, end_op);
3893                }
3894                break;
3895
3896            case 0x1f7: /* shlx Gy, Ey, By */
3897            case 0x2f7: /* sarx Gy, Ey, By */
3898            case 0x3f7: /* shrx Gy, Ey, By */
3899                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3900                    || !(s->prefix & PREFIX_VEX)
3901                    || s->vex_l != 0) {
3902                    goto illegal_op;
3903                }
3904                ot = mo_64_32(s->dflag);
3905                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3906                if (ot == MO_64) {
3907                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 63);
3908                } else {
3909                    tcg_gen_andi_tl(cpu_T1, cpu_regs[s->vex_v], 31);
3910                }
3911                if (b == 0x1f7) {
3912                    tcg_gen_shl_tl(cpu_T0, cpu_T0, cpu_T1);
3913                } else if (b == 0x2f7) {
3914                    if (ot != MO_64) {
3915                        tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
3916                    }
3917                    tcg_gen_sar_tl(cpu_T0, cpu_T0, cpu_T1);
3918                } else {
3919                    if (ot != MO_64) {
3920                        tcg_gen_ext32u_tl(cpu_T0, cpu_T0);
3921                    }
3922                    tcg_gen_shr_tl(cpu_T0, cpu_T0, cpu_T1);
3923                }
3924                gen_op_mov_reg_v(ot, reg, cpu_T0);
3925                break;
3926
3927            case 0x0f3:
3928            case 0x1f3:
3929            case 0x2f3:
3930            case 0x3f3: /* Group 17 */
3931                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3932                    || !(s->prefix & PREFIX_VEX)
3933                    || s->vex_l != 0) {
3934                    goto illegal_op;
3935                }
3936                ot = mo_64_32(s->dflag);
3937                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3938
3939                switch (reg & 7) {
3940                case 1: /* blsr By,Ey */
3941                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
3942                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
3943                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
3944                    gen_op_update2_cc();
3945                    set_cc_op(s, CC_OP_BMILGB + ot);
3946                    break;
3947
3948                case 2: /* blsmsk By,Ey */
3949                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
3950                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
3951                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
3952                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
3953                    set_cc_op(s, CC_OP_BMILGB + ot);
3954                    break;
3955
3956                case 3: /* blsi By, Ey */
3957                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
3958                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
3959                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
3960                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
3961                    set_cc_op(s, CC_OP_BMILGB + ot);
3962                    break;
3963
3964                default:
3965                    goto unknown_op;
3966                }
3967                break;
3968
3969            default:
3970                goto unknown_op;
3971            }
3972            break;
3973
3974        case 0x03a:
3975        case 0x13a:
3976            b = modrm;
3977            modrm = cpu_ldub_code(env, s->pc++);
3978            rm = modrm & 7;
3979            reg = ((modrm >> 3) & 7) | rex_r;
3980            mod = (modrm >> 6) & 3;
3981            if (b1 >= 2) {
3982                goto unknown_op;
3983            }
3984
3985            sse_fn_eppi = sse_op_table7[b].op[b1];
3986            if (!sse_fn_eppi) {
3987                goto unknown_op;
3988            }
3989            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
3990                goto illegal_op;
3991
3992            if (sse_fn_eppi == SSE_SPECIAL) {
3993                ot = mo_64_32(s->dflag);
3994                rm = (modrm & 7) | REX_B(s);
3995                if (mod != 3)
3996                    gen_lea_modrm(env, s, modrm);
3997                reg = ((modrm >> 3) & 7) | rex_r;
3998                val = cpu_ldub_code(env, s->pc++);
3999                switch (b) {
4000                case 0x14: /* pextrb */
4001                    tcg_gen_ld8u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4002                                            xmm_regs[reg].ZMM_B(val & 15)));
4003                    if (mod == 3) {
4004                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4005                    } else {
4006                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4007                                           s->mem_index, MO_UB);
4008                    }
4009                    break;
4010                case 0x15: /* pextrw */
4011                    tcg_gen_ld16u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4012                                            xmm_regs[reg].ZMM_W(val & 7)));
4013                    if (mod == 3) {
4014                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4015                    } else {
4016                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4017                                           s->mem_index, MO_LEUW);
4018                    }
4019                    break;
4020                case 0x16:
4021                    if (ot == MO_32) { /* pextrd */
4022                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4023                                        offsetof(CPUX86State,
4024                                                xmm_regs[reg].ZMM_L(val & 3)));
4025                        if (mod == 3) {
4026                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4027                        } else {
4028                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4029                                                s->mem_index, MO_LEUL);
4030                        }
4031                    } else { /* pextrq */
4032#ifdef TARGET_X86_64
4033                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4034                                        offsetof(CPUX86State,
4035                                                xmm_regs[reg].ZMM_Q(val & 1)));
4036                        if (mod == 3) {
4037                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4038                        } else {
4039                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4040                                                s->mem_index, MO_LEQ);
4041                        }
4042#else
4043                        goto illegal_op;
4044#endif
4045                    }
4046                    break;
4047                case 0x17: /* extractps */
4048                    tcg_gen_ld32u_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4049                                            xmm_regs[reg].ZMM_L(val & 3)));
4050                    if (mod == 3) {
4051                        gen_op_mov_reg_v(ot, rm, cpu_T0);
4052                    } else {
4053                        tcg_gen_qemu_st_tl(cpu_T0, cpu_A0,
4054                                           s->mem_index, MO_LEUL);
4055                    }
4056                    break;
4057                case 0x20: /* pinsrb */
4058                    if (mod == 3) {
4059                        gen_op_mov_v_reg(MO_32, cpu_T0, rm);
4060                    } else {
4061                        tcg_gen_qemu_ld_tl(cpu_T0, cpu_A0,
4062                                           s->mem_index, MO_UB);
4063                    }
4064                    tcg_gen_st8_tl(cpu_T0, cpu_env, offsetof(CPUX86State,
4065                                            xmm_regs[reg].ZMM_B(val & 15)));
4066                    break;
4067                case 0x21: /* insertps */
4068                    if (mod == 3) {
4069                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4070                                        offsetof(CPUX86State,xmm_regs[rm]
4071                                                .ZMM_L((val >> 6) & 3)));
4072                    } else {
4073                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4074                                            s->mem_index, MO_LEUL);
4075                    }
4076                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4077                                    offsetof(CPUX86State,xmm_regs[reg]
4078                                            .ZMM_L((val >> 4) & 3)));
4079                    if ((val >> 0) & 1)
4080                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4081                                        cpu_env, offsetof(CPUX86State,
4082                                                xmm_regs[reg].ZMM_L(0)));
4083                    if ((val >> 1) & 1)
4084                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4085                                        cpu_env, offsetof(CPUX86State,
4086                                                xmm_regs[reg].ZMM_L(1)));
4087                    if ((val >> 2) & 1)
4088                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4089                                        cpu_env, offsetof(CPUX86State,
4090                                                xmm_regs[reg].ZMM_L(2)));
4091                    if ((val >> 3) & 1)
4092                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4093                                        cpu_env, offsetof(CPUX86State,
4094                                                xmm_regs[reg].ZMM_L(3)));
4095                    break;
4096                case 0x22:
4097                    if (ot == MO_32) { /* pinsrd */
4098                        if (mod == 3) {
4099                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4100                        } else {
4101                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4102                                                s->mem_index, MO_LEUL);
4103                        }
4104                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4105                                        offsetof(CPUX86State,
4106                                                xmm_regs[reg].ZMM_L(val & 3)));
4107                    } else { /* pinsrq */
4108#ifdef TARGET_X86_64
4109                        if (mod == 3) {
4110                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4111                        } else {
4112                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4113                                                s->mem_index, MO_LEQ);
4114                        }
4115                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4116                                        offsetof(CPUX86State,
4117                                                xmm_regs[reg].ZMM_Q(val & 1)));
4118#else
4119                        goto illegal_op;
4120#endif
4121                    }
4122                    break;
4123                }
4124                return;
4125            }
4126
4127            if (b1) {
4128                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4129                if (mod == 3) {
4130                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4131                } else {
4132                    op2_offset = offsetof(CPUX86State,xmm_t0);
4133                    gen_lea_modrm(env, s, modrm);
4134                    gen_ldo_env_A0(s, op2_offset);
4135                }
4136            } else {
4137                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4138                if (mod == 3) {
4139                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4140                } else {
4141                    op2_offset = offsetof(CPUX86State,mmx_t0);
4142                    gen_lea_modrm(env, s, modrm);
4143                    gen_ldq_env_A0(s, op2_offset);
4144                }
4145            }
4146            val = cpu_ldub_code(env, s->pc++);
4147
4148            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4149                set_cc_op(s, CC_OP_EFLAGS);
4150
4151                if (s->dflag == MO_64) {
4152                    /* The helper must use entire 64-bit gp registers */
4153                    val |= 1 << 8;
4154                }
4155            }
4156
4157            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4158            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4159            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4160            break;
4161
4162        case 0x33a:
4163            /* Various integer extensions at 0f 3a f[0-f].  */
4164            b = modrm | (b1 << 8);
4165            modrm = cpu_ldub_code(env, s->pc++);
4166            reg = ((modrm >> 3) & 7) | rex_r;
4167
4168            switch (b) {
4169            case 0x3f0: /* rorx Gy,Ey, Ib */
4170                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4171                    || !(s->prefix & PREFIX_VEX)
4172                    || s->vex_l != 0) {
4173                    goto illegal_op;
4174                }
4175                ot = mo_64_32(s->dflag);
4176                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4177                b = cpu_ldub_code(env, s->pc++);
4178                if (ot == MO_64) {
4179                    tcg_gen_rotri_tl(cpu_T0, cpu_T0, b & 63);
4180                } else {
4181                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4182                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4183                    tcg_gen_extu_i32_tl(cpu_T0, cpu_tmp2_i32);
4184                }
4185                gen_op_mov_reg_v(ot, reg, cpu_T0);
4186                break;
4187
4188            default:
4189                goto unknown_op;
4190            }
4191            break;
4192
4193        default:
4194        unknown_op:
4195            gen_unknown_opcode(env, s);
4196            return;
4197        }
4198    } else {
4199        /* generic MMX or SSE operation */
4200        switch(b) {
4201        case 0x70: /* pshufx insn */
4202        case 0xc6: /* pshufx insn */
4203        case 0xc2: /* compare insns */
4204            s->rip_offset = 1;
4205            break;
4206        default:
4207            break;
4208        }
4209        if (is_xmm) {
4210            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4211            if (mod != 3) {
4212                int sz = 4;
4213
4214                gen_lea_modrm(env, s, modrm);
4215                op2_offset = offsetof(CPUX86State,xmm_t0);
4216
4217                switch (b) {
4218                case 0x50 ... 0x5a:
4219                case 0x5c ... 0x5f:
4220                case 0xc2:
4221                    /* Most sse scalar operations.  */
4222                    if (b1 == 2) {
4223                        sz = 2;
4224                    } else if (b1 == 3) {
4225                        sz = 3;
4226                    }
4227                    break;
4228
4229                case 0x2e:  /* ucomis[sd] */
4230                case 0x2f:  /* comis[sd] */
4231                    if (b1 == 0) {
4232                        sz = 2;
4233                    } else {
4234                        sz = 3;
4235                    }
4236                    break;
4237                }
4238
4239                switch (sz) {
4240                case 2:
4241                    /* 32 bit access */
4242                    gen_op_ld_v(s, MO_32, cpu_T0, cpu_A0);
4243                    tcg_gen_st32_tl(cpu_T0, cpu_env,
4244                                    offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4245                    break;
4246                case 3:
4247                    /* 64 bit access */
4248                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4249                    break;
4250                default:
4251                    /* 128 bit access */
4252                    gen_ldo_env_A0(s, op2_offset);
4253                    break;
4254                }
4255            } else {
4256                rm = (modrm & 7) | REX_B(s);
4257                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4258            }
4259        } else {
4260            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4261            if (mod != 3) {
4262                gen_lea_modrm(env, s, modrm);
4263                op2_offset = offsetof(CPUX86State,mmx_t0);
4264                gen_ldq_env_A0(s, op2_offset);
4265            } else {
4266                rm = (modrm & 7);
4267                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4268            }
4269        }
4270        switch(b) {
4271        case 0x0f: /* 3DNow! data insns */
4272            val = cpu_ldub_code(env, s->pc++);
4273            sse_fn_epp = sse_op_table5[val];
4274            if (!sse_fn_epp) {
4275                goto unknown_op;
4276            }
4277            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4278                goto illegal_op;
4279            }
4280            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4281            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4282            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4283            break;
4284        case 0x70: /* pshufx insn */
4285        case 0xc6: /* pshufx insn */
4286            val = cpu_ldub_code(env, s->pc++);
4287            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4288            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4289            /* XXX: introduce a new table? */
4290            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4291            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4292            break;
4293        case 0xc2:
4294            /* compare insns */
4295            val = cpu_ldub_code(env, s->pc++);
4296            if (val >= 8)
4297                goto unknown_op;
4298            sse_fn_epp = sse_op_table4[val][b1];
4299
4300            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4301            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4302            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4303            break;
4304        case 0xf7:
4305            /* maskmov : we must prepare A0 */
4306            if (mod != 3)
4307                goto illegal_op;
4308            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4309            gen_extu(s->aflag, cpu_A0);
4310            gen_add_A0_ds_seg(s);
4311
4312            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4313            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4314            /* XXX: introduce a new table? */
4315            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4316            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4317            break;
4318        default:
4319            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4320            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4321            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4322            break;
4323        }
4324        if (b == 0x2e || b == 0x2f) {
4325            set_cc_op(s, CC_OP_EFLAGS);
4326        }
4327    }
4328}
4329
4330/* convert one instruction. s->is_jmp is set if the translation must
4331   be stopped. Return the next pc value */
4332static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4333                               target_ulong pc_start)
4334{
4335    int b, prefixes;
4336    int shift;
4337    TCGMemOp ot, aflag, dflag;
4338    int modrm, reg, rm, mod, op, opreg, val;
4339    target_ulong next_eip, tval;
4340    int rex_w, rex_r;
4341
4342    s->pc_start = s->pc = pc_start;
4343    prefixes = 0;
4344    s->override = -1;
4345    rex_w = -1;
4346    rex_r = 0;
4347#ifdef TARGET_X86_64
4348    s->rex_x = 0;
4349    s->rex_b = 0;
4350    x86_64_hregs = 0;
4351#endif
4352    s->rip_offset = 0; /* for relative ip address */
4353    s->vex_l = 0;
4354    s->vex_v = 0;
4355 next_byte:
4356    b = cpu_ldub_code(env, s->pc);
4357    s->pc++;
4358    /* Collect prefixes.  */
4359    switch (b) {
4360    case 0xf3:
4361        prefixes |= PREFIX_REPZ;
4362        goto next_byte;
4363    case 0xf2:
4364        prefixes |= PREFIX_REPNZ;
4365        goto next_byte;
4366    case 0xf0:
4367        prefixes |= PREFIX_LOCK;
4368        goto next_byte;
4369    case 0x2e:
4370        s->override = R_CS;
4371        goto next_byte;
4372    case 0x36:
4373        s->override = R_SS;
4374        goto next_byte;
4375    case 0x3e:
4376        s->override = R_DS;
4377        goto next_byte;
4378    case 0x26:
4379        s->override = R_ES;
4380        goto next_byte;
4381    case 0x64:
4382        s->override = R_FS;
4383        goto next_byte;
4384    case 0x65:
4385        s->override = R_GS;
4386        goto next_byte;
4387    case 0x66:
4388        prefixes |= PREFIX_DATA;
4389        goto next_byte;
4390    case 0x67:
4391        prefixes |= PREFIX_ADR;
4392        goto next_byte;
4393#ifdef TARGET_X86_64
4394    case 0x40 ... 0x4f:
4395        if (CODE64(s)) {
4396            /* REX prefix */
4397            rex_w = (b >> 3) & 1;
4398            rex_r = (b & 0x4) << 1;
4399            s->rex_x = (b & 0x2) << 2;
4400            REX_B(s) = (b & 0x1) << 3;
4401            x86_64_hregs = 1; /* select uniform byte register addressing */
4402            goto next_byte;
4403        }
4404        break;
4405#endif
4406    case 0xc5: /* 2-byte VEX */
4407    case 0xc4: /* 3-byte VEX */
4408        /* VEX prefixes cannot be used except in 32-bit mode.
4409           Otherwise the instruction is LES or LDS.  */
4410        if (s->code32 && !s->vm86) {
4411            static const int pp_prefix[4] = {
4412                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4413            };
4414            int vex3, vex2 = cpu_ldub_code(env, s->pc);
4415
4416            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4417                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4418                   otherwise the instruction is LES or LDS.  */
4419                break;
4420            }
4421            s->pc++;
4422
4423            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4424            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4425                            | PREFIX_LOCK | PREFIX_DATA)) {
4426                goto illegal_op;
4427            }
4428#ifdef TARGET_X86_64
4429            if (x86_64_hregs) {
4430                goto illegal_op;
4431            }
4432#endif
4433            rex_r = (~vex2 >> 4) & 8;
4434            if (b == 0xc5) {
4435                vex3 = vex2;
4436                b = cpu_ldub_code(env, s->pc++);
4437            } else {
4438#ifdef TARGET_X86_64
4439                s->rex_x = (~vex2 >> 3) & 8;
4440                s->rex_b = (~vex2 >> 2) & 8;
4441#endif
4442                vex3 = cpu_ldub_code(env, s->pc++);
4443                rex_w = (vex3 >> 7) & 1;
4444                switch (vex2 & 0x1f) {
4445                case 0x01: /* Implied 0f leading opcode bytes.  */
4446                    b = cpu_ldub_code(env, s->pc++) | 0x100;
4447                    break;
4448                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4449                    b = 0x138;
4450                    break;
4451                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4452                    b = 0x13a;
4453                    break;
4454                default:   /* Reserved for future use.  */
4455                    goto unknown_op;
4456                }
4457            }
4458            s->vex_v = (~vex3 >> 3) & 0xf;
4459            s->vex_l = (vex3 >> 2) & 1;
4460            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4461        }
4462        break;
4463    }
4464
4465    /* Post-process prefixes.  */
4466    if (CODE64(s)) {
4467        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4468           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4469           over 0x66 if both are present.  */
4470        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4471        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4472        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4473    } else {
4474        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4475        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4476            dflag = MO_32;
4477        } else {
4478            dflag = MO_16;
4479        }
4480        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4481        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4482            aflag = MO_32;
4483        }  else {
4484            aflag = MO_16;
4485        }
4486    }
4487
4488    s->prefix = prefixes;
4489    s->aflag = aflag;
4490    s->dflag = dflag;
4491
4492    /* lock generation */
4493    if (prefixes & PREFIX_LOCK)
4494        gen_helper_lock();
4495
4496    /* now check op code */
4497 reswitch:
4498    switch(b) {
4499    case 0x0f:
4500        /**************************/
4501        /* extended op code */
4502        b = cpu_ldub_code(env, s->pc++) | 0x100;
4503        goto reswitch;
4504
4505        /**************************/
4506        /* arith & logic */
4507    case 0x00 ... 0x05:
4508    case 0x08 ... 0x0d:
4509    case 0x10 ... 0x15:
4510    case 0x18 ... 0x1d:
4511    case 0x20 ... 0x25:
4512    case 0x28 ... 0x2d:
4513    case 0x30 ... 0x35:
4514    case 0x38 ... 0x3d:
4515        {
4516            int op, f, val;
4517            op = (b >> 3) & 7;
4518            f = (b >> 1) & 3;
4519
4520            ot = mo_b_d(b, dflag);
4521
4522            switch(f) {
4523            case 0: /* OP Ev, Gv */
4524                modrm = cpu_ldub_code(env, s->pc++);
4525                reg = ((modrm >> 3) & 7) | rex_r;
4526                mod = (modrm >> 6) & 3;
4527                rm = (modrm & 7) | REX_B(s);
4528                if (mod != 3) {
4529                    gen_lea_modrm(env, s, modrm);
4530                    opreg = OR_TMP0;
4531                } else if (op == OP_XORL && rm == reg) {
4532                xor_zero:
4533                    /* xor reg, reg optimisation */
4534                    set_cc_op(s, CC_OP_CLR);
4535                    tcg_gen_movi_tl(cpu_T0, 0);
4536                    gen_op_mov_reg_v(ot, reg, cpu_T0);
4537                    break;
4538                } else {
4539                    opreg = rm;
4540                }
4541                gen_op_mov_v_reg(ot, cpu_T1, reg);
4542                gen_op(s, op, ot, opreg);
4543                break;
4544            case 1: /* OP Gv, Ev */
4545                modrm = cpu_ldub_code(env, s->pc++);
4546                mod = (modrm >> 6) & 3;
4547                reg = ((modrm >> 3) & 7) | rex_r;
4548                rm = (modrm & 7) | REX_B(s);
4549                if (mod != 3) {
4550                    gen_lea_modrm(env, s, modrm);
4551                    gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4552                } else if (op == OP_XORL && rm == reg) {
4553                    goto xor_zero;
4554                } else {
4555                    gen_op_mov_v_reg(ot, cpu_T1, rm);
4556                }
4557                gen_op(s, op, ot, reg);
4558                break;
4559            case 2: /* OP A, Iv */
4560                val = insn_get(env, s, ot);
4561                tcg_gen_movi_tl(cpu_T1, val);
4562                gen_op(s, op, ot, OR_EAX);
4563                break;
4564            }
4565        }
4566        break;
4567
4568    case 0x82:
4569        if (CODE64(s))
4570            goto illegal_op;
4571    case 0x80: /* GRP1 */
4572    case 0x81:
4573    case 0x83:
4574        {
4575            int val;
4576
4577            ot = mo_b_d(b, dflag);
4578
4579            modrm = cpu_ldub_code(env, s->pc++);
4580            mod = (modrm >> 6) & 3;
4581            rm = (modrm & 7) | REX_B(s);
4582            op = (modrm >> 3) & 7;
4583
4584            if (mod != 3) {
4585                if (b == 0x83)
4586                    s->rip_offset = 1;
4587                else
4588                    s->rip_offset = insn_const_size(ot);
4589                gen_lea_modrm(env, s, modrm);
4590                opreg = OR_TMP0;
4591            } else {
4592                opreg = rm;
4593            }
4594
4595            switch(b) {
4596            default:
4597            case 0x80:
4598            case 0x81:
4599            case 0x82:
4600                val = insn_get(env, s, ot);
4601                break;
4602            case 0x83:
4603                val = (int8_t)insn_get(env, s, MO_8);
4604                break;
4605            }
4606            tcg_gen_movi_tl(cpu_T1, val);
4607            gen_op(s, op, ot, opreg);
4608        }
4609        break;
4610
4611        /**************************/
4612        /* inc, dec, and other misc arith */
4613    case 0x40 ... 0x47: /* inc Gv */
4614        ot = dflag;
4615        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4616        break;
4617    case 0x48 ... 0x4f: /* dec Gv */
4618        ot = dflag;
4619        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4620        break;
4621    case 0xf6: /* GRP3 */
4622    case 0xf7:
4623        ot = mo_b_d(b, dflag);
4624
4625        modrm = cpu_ldub_code(env, s->pc++);
4626        mod = (modrm >> 6) & 3;
4627        rm = (modrm & 7) | REX_B(s);
4628        op = (modrm >> 3) & 7;
4629        if (mod != 3) {
4630            if (op == 0)
4631                s->rip_offset = insn_const_size(ot);
4632            gen_lea_modrm(env, s, modrm);
4633            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4634        } else {
4635            gen_op_mov_v_reg(ot, cpu_T0, rm);
4636        }
4637
4638        switch(op) {
4639        case 0: /* test */
4640            val = insn_get(env, s, ot);
4641            tcg_gen_movi_tl(cpu_T1, val);
4642            gen_op_testl_T0_T1_cc();
4643            set_cc_op(s, CC_OP_LOGICB + ot);
4644            break;
4645        case 2: /* not */
4646            tcg_gen_not_tl(cpu_T0, cpu_T0);
4647            if (mod != 3) {
4648                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4649            } else {
4650                gen_op_mov_reg_v(ot, rm, cpu_T0);
4651            }
4652            break;
4653        case 3: /* neg */
4654            tcg_gen_neg_tl(cpu_T0, cpu_T0);
4655            if (mod != 3) {
4656                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
4657            } else {
4658                gen_op_mov_reg_v(ot, rm, cpu_T0);
4659            }
4660            gen_op_update_neg_cc();
4661            set_cc_op(s, CC_OP_SUBB + ot);
4662            break;
4663        case 4: /* mul */
4664            switch(ot) {
4665            case MO_8:
4666                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4667                tcg_gen_ext8u_tl(cpu_T0, cpu_T0);
4668                tcg_gen_ext8u_tl(cpu_T1, cpu_T1);
4669                /* XXX: use 32 bit mul which could be faster */
4670                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4671                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4672                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4673                tcg_gen_andi_tl(cpu_cc_src, cpu_T0, 0xff00);
4674                set_cc_op(s, CC_OP_MULB);
4675                break;
4676            case MO_16:
4677                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4678                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4679                tcg_gen_ext16u_tl(cpu_T1, cpu_T1);
4680                /* XXX: use 32 bit mul which could be faster */
4681                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4682                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4683                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4684                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4685                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4686                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
4687                set_cc_op(s, CC_OP_MULW);
4688                break;
4689            default:
4690            case MO_32:
4691                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4692                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4693                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4694                                  cpu_tmp2_i32, cpu_tmp3_i32);
4695                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4696                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4697                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4698                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4699                set_cc_op(s, CC_OP_MULL);
4700                break;
4701#ifdef TARGET_X86_64
4702            case MO_64:
4703                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4704                                  cpu_T0, cpu_regs[R_EAX]);
4705                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4706                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4707                set_cc_op(s, CC_OP_MULQ);
4708                break;
4709#endif
4710            }
4711            break;
4712        case 5: /* imul */
4713            switch(ot) {
4714            case MO_8:
4715                gen_op_mov_v_reg(MO_8, cpu_T1, R_EAX);
4716                tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4717                tcg_gen_ext8s_tl(cpu_T1, cpu_T1);
4718                /* XXX: use 32 bit mul which could be faster */
4719                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4720                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4721                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4722                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T0);
4723                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4724                set_cc_op(s, CC_OP_MULB);
4725                break;
4726            case MO_16:
4727                gen_op_mov_v_reg(MO_16, cpu_T1, R_EAX);
4728                tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4729                tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
4730                /* XXX: use 32 bit mul which could be faster */
4731                tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
4732                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4733                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
4734                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
4735                tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
4736                tcg_gen_shri_tl(cpu_T0, cpu_T0, 16);
4737                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4738                set_cc_op(s, CC_OP_MULW);
4739                break;
4740            default:
4741            case MO_32:
4742                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4743                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4744                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4745                                  cpu_tmp2_i32, cpu_tmp3_i32);
4746                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4747                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4748                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4749                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4750                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4751                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4752                set_cc_op(s, CC_OP_MULL);
4753                break;
4754#ifdef TARGET_X86_64
4755            case MO_64:
4756                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4757                                  cpu_T0, cpu_regs[R_EAX]);
4758                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4759                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4760                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4761                set_cc_op(s, CC_OP_MULQ);
4762                break;
4763#endif
4764            }
4765            break;
4766        case 6: /* div */
4767            switch(ot) {
4768            case MO_8:
4769                gen_helper_divb_AL(cpu_env, cpu_T0);
4770                break;
4771            case MO_16:
4772                gen_helper_divw_AX(cpu_env, cpu_T0);
4773                break;
4774            default:
4775            case MO_32:
4776                gen_helper_divl_EAX(cpu_env, cpu_T0);
4777                break;
4778#ifdef TARGET_X86_64
4779            case MO_64:
4780                gen_helper_divq_EAX(cpu_env, cpu_T0);
4781                break;
4782#endif
4783            }
4784            break;
4785        case 7: /* idiv */
4786            switch(ot) {
4787            case MO_8:
4788                gen_helper_idivb_AL(cpu_env, cpu_T0);
4789                break;
4790            case MO_16:
4791                gen_helper_idivw_AX(cpu_env, cpu_T0);
4792                break;
4793            default:
4794            case MO_32:
4795                gen_helper_idivl_EAX(cpu_env, cpu_T0);
4796                break;
4797#ifdef TARGET_X86_64
4798            case MO_64:
4799                gen_helper_idivq_EAX(cpu_env, cpu_T0);
4800                break;
4801#endif
4802            }
4803            break;
4804        default:
4805            goto unknown_op;
4806        }
4807        break;
4808
4809    case 0xfe: /* GRP4 */
4810    case 0xff: /* GRP5 */
4811        ot = mo_b_d(b, dflag);
4812
4813        modrm = cpu_ldub_code(env, s->pc++);
4814        mod = (modrm >> 6) & 3;
4815        rm = (modrm & 7) | REX_B(s);
4816        op = (modrm >> 3) & 7;
4817        if (op >= 2 && b == 0xfe) {
4818            goto unknown_op;
4819        }
4820        if (CODE64(s)) {
4821            if (op == 2 || op == 4) {
4822                /* operand size for jumps is 64 bit */
4823                ot = MO_64;
4824            } else if (op == 3 || op == 5) {
4825                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4826            } else if (op == 6) {
4827                /* default push size is 64 bit */
4828                ot = mo_pushpop(s, dflag);
4829            }
4830        }
4831        if (mod != 3) {
4832            gen_lea_modrm(env, s, modrm);
4833            if (op >= 2 && op != 3 && op != 5)
4834                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
4835        } else {
4836            gen_op_mov_v_reg(ot, cpu_T0, rm);
4837        }
4838
4839        switch(op) {
4840        case 0: /* inc Ev */
4841            if (mod != 3)
4842                opreg = OR_TMP0;
4843            else
4844                opreg = rm;
4845            gen_inc(s, ot, opreg, 1);
4846            break;
4847        case 1: /* dec Ev */
4848            if (mod != 3)
4849                opreg = OR_TMP0;
4850            else
4851                opreg = rm;
4852            gen_inc(s, ot, opreg, -1);
4853            break;
4854        case 2: /* call Ev */
4855            /* XXX: optimize if memory (no 'and' is necessary) */
4856            if (dflag == MO_16) {
4857                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4858            }
4859            next_eip = s->pc - s->cs_base;
4860            tcg_gen_movi_tl(cpu_T1, next_eip);
4861            gen_push_v(s, cpu_T1);
4862            gen_op_jmp_v(cpu_T0);
4863            gen_bnd_jmp(s);
4864            gen_eob(s);
4865            break;
4866        case 3: /* lcall Ev */
4867            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4868            gen_add_A0_im(s, 1 << ot);
4869            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4870        do_lcall:
4871            if (s->pe && !s->vm86) {
4872                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4873                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4874                                           tcg_const_i32(dflag - 1),
4875                                           tcg_const_tl(s->pc - s->cs_base));
4876            } else {
4877                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4878                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T1,
4879                                      tcg_const_i32(dflag - 1),
4880                                      tcg_const_i32(s->pc - s->cs_base));
4881            }
4882            gen_eob(s);
4883            break;
4884        case 4: /* jmp Ev */
4885            if (dflag == MO_16) {
4886                tcg_gen_ext16u_tl(cpu_T0, cpu_T0);
4887            }
4888            gen_op_jmp_v(cpu_T0);
4889            gen_bnd_jmp(s);
4890            gen_eob(s);
4891            break;
4892        case 5: /* ljmp Ev */
4893            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
4894            gen_add_A0_im(s, 1 << ot);
4895            gen_op_ld_v(s, MO_16, cpu_T0, cpu_A0);
4896        do_ljmp:
4897            if (s->pe && !s->vm86) {
4898                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
4899                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T1,
4900                                          tcg_const_tl(s->pc - s->cs_base));
4901            } else {
4902                gen_op_movl_seg_T0_vm(R_CS);
4903                gen_op_jmp_v(cpu_T1);
4904            }
4905            gen_eob(s);
4906            break;
4907        case 6: /* push Ev */
4908            gen_push_v(s, cpu_T0);
4909            break;
4910        default:
4911            goto unknown_op;
4912        }
4913        break;
4914
4915    case 0x84: /* test Ev, Gv */
4916    case 0x85:
4917        ot = mo_b_d(b, dflag);
4918
4919        modrm = cpu_ldub_code(env, s->pc++);
4920        reg = ((modrm >> 3) & 7) | rex_r;
4921
4922        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4923        gen_op_mov_v_reg(ot, cpu_T1, reg);
4924        gen_op_testl_T0_T1_cc();
4925        set_cc_op(s, CC_OP_LOGICB + ot);
4926        break;
4927
4928    case 0xa8: /* test eAX, Iv */
4929    case 0xa9:
4930        ot = mo_b_d(b, dflag);
4931        val = insn_get(env, s, ot);
4932
4933        gen_op_mov_v_reg(ot, cpu_T0, OR_EAX);
4934        tcg_gen_movi_tl(cpu_T1, val);
4935        gen_op_testl_T0_T1_cc();
4936        set_cc_op(s, CC_OP_LOGICB + ot);
4937        break;
4938
4939    case 0x98: /* CWDE/CBW */
4940        switch (dflag) {
4941#ifdef TARGET_X86_64
4942        case MO_64:
4943            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
4944            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
4945            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T0);
4946            break;
4947#endif
4948        case MO_32:
4949            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
4950            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4951            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T0);
4952            break;
4953        case MO_16:
4954            gen_op_mov_v_reg(MO_8, cpu_T0, R_EAX);
4955            tcg_gen_ext8s_tl(cpu_T0, cpu_T0);
4956            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T0);
4957            break;
4958        default:
4959            tcg_abort();
4960        }
4961        break;
4962    case 0x99: /* CDQ/CWD */
4963        switch (dflag) {
4964#ifdef TARGET_X86_64
4965        case MO_64:
4966            gen_op_mov_v_reg(MO_64, cpu_T0, R_EAX);
4967            tcg_gen_sari_tl(cpu_T0, cpu_T0, 63);
4968            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T0);
4969            break;
4970#endif
4971        case MO_32:
4972            gen_op_mov_v_reg(MO_32, cpu_T0, R_EAX);
4973            tcg_gen_ext32s_tl(cpu_T0, cpu_T0);
4974            tcg_gen_sari_tl(cpu_T0, cpu_T0, 31);
4975            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T0);
4976            break;
4977        case MO_16:
4978            gen_op_mov_v_reg(MO_16, cpu_T0, R_EAX);
4979            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
4980            tcg_gen_sari_tl(cpu_T0, cpu_T0, 15);
4981            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T0);
4982            break;
4983        default:
4984            tcg_abort();
4985        }
4986        break;
4987    case 0x1af: /* imul Gv, Ev */
4988    case 0x69: /* imul Gv, Ev, I */
4989    case 0x6b:
4990        ot = dflag;
4991        modrm = cpu_ldub_code(env, s->pc++);
4992        reg = ((modrm >> 3) & 7) | rex_r;
4993        if (b == 0x69)
4994            s->rip_offset = insn_const_size(ot);
4995        else if (b == 0x6b)
4996            s->rip_offset = 1;
4997        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4998        if (b == 0x69) {
4999            val = insn_get(env, s, ot);
5000            tcg_gen_movi_tl(cpu_T1, val);
5001        } else if (b == 0x6b) {
5002            val = (int8_t)insn_get(env, s, MO_8);
5003            tcg_gen_movi_tl(cpu_T1, val);
5004        } else {
5005            gen_op_mov_v_reg(ot, cpu_T1, reg);
5006        }
5007        switch (ot) {
5008#ifdef TARGET_X86_64
5009        case MO_64:
5010            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T1, cpu_T0, cpu_T1);
5011            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5012            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5013            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T1);
5014            break;
5015#endif
5016        case MO_32:
5017            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T0);
5018            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T1);
5019            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5020                              cpu_tmp2_i32, cpu_tmp3_i32);
5021            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5022            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5023            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5024            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5025            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5026            break;
5027        default:
5028            tcg_gen_ext16s_tl(cpu_T0, cpu_T0);
5029            tcg_gen_ext16s_tl(cpu_T1, cpu_T1);
5030            /* XXX: use 32 bit mul which could be faster */
5031            tcg_gen_mul_tl(cpu_T0, cpu_T0, cpu_T1);
5032            tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
5033            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T0);
5034            tcg_gen_sub_tl(cpu_cc_src, cpu_T0, cpu_tmp0);
5035            gen_op_mov_reg_v(ot, reg, cpu_T0);
5036            break;
5037        }
5038        set_cc_op(s, CC_OP_MULB + ot);
5039        break;
5040    case 0x1c0:
5041    case 0x1c1: /* xadd Ev, Gv */
5042        ot = mo_b_d(b, dflag);
5043        modrm = cpu_ldub_code(env, s->pc++);
5044        reg = ((modrm >> 3) & 7) | rex_r;
5045        mod = (modrm >> 6) & 3;
5046        if (mod == 3) {
5047            rm = (modrm & 7) | REX_B(s);
5048            gen_op_mov_v_reg(ot, cpu_T0, reg);
5049            gen_op_mov_v_reg(ot, cpu_T1, rm);
5050            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5051            gen_op_mov_reg_v(ot, reg, cpu_T1);
5052            gen_op_mov_reg_v(ot, rm, cpu_T0);
5053        } else {
5054            gen_lea_modrm(env, s, modrm);
5055            gen_op_mov_v_reg(ot, cpu_T0, reg);
5056            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
5057            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
5058            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
5059            gen_op_mov_reg_v(ot, reg, cpu_T1);
5060        }
5061        gen_op_update2_cc();
5062        set_cc_op(s, CC_OP_ADDB + ot);
5063        break;
5064    case 0x1b0:
5065    case 0x1b1: /* cmpxchg Ev, Gv */
5066        {
5067            TCGLabel *label1, *label2;
5068            TCGv t0, t1, t2, a0;
5069
5070            ot = mo_b_d(b, dflag);
5071            modrm = cpu_ldub_code(env, s->pc++);
5072            reg = ((modrm >> 3) & 7) | rex_r;
5073            mod = (modrm >> 6) & 3;
5074            t0 = tcg_temp_local_new();
5075            t1 = tcg_temp_local_new();
5076            t2 = tcg_temp_local_new();
5077            a0 = tcg_temp_local_new();
5078            gen_op_mov_v_reg(ot, t1, reg);
5079            if (mod == 3) {
5080                rm = (modrm & 7) | REX_B(s);
5081                gen_op_mov_v_reg(ot, t0, rm);
5082            } else {
5083                gen_lea_modrm(env, s, modrm);
5084                tcg_gen_mov_tl(a0, cpu_A0);
5085                gen_op_ld_v(s, ot, t0, a0);
5086                rm = 0; /* avoid warning */
5087            }
5088            label1 = gen_new_label();
5089            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
5090            gen_extu(ot, t0);
5091            gen_extu(ot, t2);
5092            tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
5093            label2 = gen_new_label();
5094            if (mod == 3) {
5095                gen_op_mov_reg_v(ot, R_EAX, t0);
5096                tcg_gen_br(label2);
5097                gen_set_label(label1);
5098                gen_op_mov_reg_v(ot, rm, t1);
5099            } else {
5100                /* perform no-op store cycle like physical cpu; must be
5101                   before changing accumulator to ensure idempotency if
5102                   the store faults and the instruction is restarted */
5103                gen_op_st_v(s, ot, t0, a0);
5104                gen_op_mov_reg_v(ot, R_EAX, t0);
5105                tcg_gen_br(label2);
5106                gen_set_label(label1);
5107                gen_op_st_v(s, ot, t1, a0);
5108            }
5109            gen_set_label(label2);
5110            tcg_gen_mov_tl(cpu_cc_src, t0);
5111            tcg_gen_mov_tl(cpu_cc_srcT, t2);
5112            tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
5113            set_cc_op(s, CC_OP_SUBB + ot);
5114            tcg_temp_free(t0);
5115            tcg_temp_free(t1);
5116            tcg_temp_free(t2);
5117            tcg_temp_free(a0);
5118        }
5119        break;
5120    case 0x1c7: /* cmpxchg8b */
5121        modrm = cpu_ldub_code(env, s->pc++);
5122        mod = (modrm >> 6) & 3;
5123        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5124            goto illegal_op;
5125#ifdef TARGET_X86_64
5126        if (dflag == MO_64) {
5127            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5128                goto illegal_op;
5129            gen_lea_modrm(env, s, modrm);
5130            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5131        } else
5132#endif        
5133        {
5134            if (!(s->cpuid_features & CPUID_CX8))
5135                goto illegal_op;
5136            gen_lea_modrm(env, s, modrm);
5137            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5138        }
5139        set_cc_op(s, CC_OP_EFLAGS);
5140        break;
5141
5142        /**************************/
5143        /* push/pop */
5144    case 0x50 ... 0x57: /* push */
5145        gen_op_mov_v_reg(MO_32, cpu_T0, (b & 7) | REX_B(s));
5146        gen_push_v(s, cpu_T0);
5147        break;
5148    case 0x58 ... 0x5f: /* pop */
5149        ot = gen_pop_T0(s);
5150        /* NOTE: order is important for pop %sp */
5151        gen_pop_update(s, ot);
5152        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T0);
5153        break;
5154    case 0x60: /* pusha */
5155        if (CODE64(s))
5156            goto illegal_op;
5157        gen_pusha(s);
5158        break;
5159    case 0x61: /* popa */
5160        if (CODE64(s))
5161            goto illegal_op;
5162        gen_popa(s);
5163        break;
5164    case 0x68: /* push Iv */
5165    case 0x6a:
5166        ot = mo_pushpop(s, dflag);
5167        if (b == 0x68)
5168            val = insn_get(env, s, ot);
5169        else
5170            val = (int8_t)insn_get(env, s, MO_8);
5171        tcg_gen_movi_tl(cpu_T0, val);
5172        gen_push_v(s, cpu_T0);
5173        break;
5174    case 0x8f: /* pop Ev */
5175        modrm = cpu_ldub_code(env, s->pc++);
5176        mod = (modrm >> 6) & 3;
5177        ot = gen_pop_T0(s);
5178        if (mod == 3) {
5179            /* NOTE: order is important for pop %sp */
5180            gen_pop_update(s, ot);
5181            rm = (modrm & 7) | REX_B(s);
5182            gen_op_mov_reg_v(ot, rm, cpu_T0);
5183        } else {
5184            /* NOTE: order is important too for MMU exceptions */
5185            s->popl_esp_hack = 1 << ot;
5186            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5187            s->popl_esp_hack = 0;
5188            gen_pop_update(s, ot);
5189        }
5190        break;
5191    case 0xc8: /* enter */
5192        {
5193            int level;
5194            val = cpu_lduw_code(env, s->pc);
5195            s->pc += 2;
5196            level = cpu_ldub_code(env, s->pc++);
5197            gen_enter(s, val, level);
5198        }
5199        break;
5200    case 0xc9: /* leave */
5201        gen_leave(s);
5202        break;
5203    case 0x06: /* push es */
5204    case 0x0e: /* push cs */
5205    case 0x16: /* push ss */
5206    case 0x1e: /* push ds */
5207        if (CODE64(s))
5208            goto illegal_op;
5209        gen_op_movl_T0_seg(b >> 3);
5210        gen_push_v(s, cpu_T0);
5211        break;
5212    case 0x1a0: /* push fs */
5213    case 0x1a8: /* push gs */
5214        gen_op_movl_T0_seg((b >> 3) & 7);
5215        gen_push_v(s, cpu_T0);
5216        break;
5217    case 0x07: /* pop es */
5218    case 0x17: /* pop ss */
5219    case 0x1f: /* pop ds */
5220        if (CODE64(s))
5221            goto illegal_op;
5222        reg = b >> 3;
5223        ot = gen_pop_T0(s);
5224        gen_movl_seg_T0(s, reg);
5225        gen_pop_update(s, ot);
5226        /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5227        if (s->is_jmp) {
5228            gen_jmp_im(s->pc - s->cs_base);
5229            if (reg == R_SS) {
5230                s->tf = 0;
5231                gen_eob_inhibit_irq(s, true);
5232            } else {
5233                gen_eob(s);
5234            }
5235        }
5236        break;
5237    case 0x1a1: /* pop fs */