qemu/target-i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include <stdarg.h>
  20#include <stdlib.h>
  21#include <stdio.h>
  22#include <string.h>
  23#include <inttypes.h>
  24
  25#include "qemu/host-utils.h"
  26#include "cpu.h"
  27#include "disas/disas.h"
  28#include "tcg-op.h"
  29#include "exec/cpu_ldst.h"
  30
  31#include "exec/helper-proto.h"
  32#include "exec/helper-gen.h"
  33
  34#include "trace-tcg.h"
  35
  36
  37#define PREFIX_REPZ   0x01
  38#define PREFIX_REPNZ  0x02
  39#define PREFIX_LOCK   0x04
  40#define PREFIX_DATA   0x08
  41#define PREFIX_ADR    0x10
  42#define PREFIX_VEX    0x20
  43
  44#ifdef TARGET_X86_64
  45#define CODE64(s) ((s)->code64)
  46#define REX_X(s) ((s)->rex_x)
  47#define REX_B(s) ((s)->rex_b)
  48#else
  49#define CODE64(s) 0
  50#define REX_X(s) 0
  51#define REX_B(s) 0
  52#endif
  53
  54#ifdef TARGET_X86_64
  55# define ctztl  ctz64
  56# define clztl  clz64
  57#else
  58# define ctztl  ctz32
  59# define clztl  clz32
  60#endif
  61
  62//#define MACRO_TEST   1
  63
  64/* global register indexes */
  65static TCGv_ptr cpu_env;
  66static TCGv cpu_A0;
  67static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
  68static TCGv_i32 cpu_cc_op;
  69static TCGv cpu_regs[CPU_NB_REGS];
  70/* local temps */
  71static TCGv cpu_T[2];
  72/* local register indexes (only used inside old micro ops) */
  73static TCGv cpu_tmp0, cpu_tmp4;
  74static TCGv_ptr cpu_ptr0, cpu_ptr1;
  75static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
  76static TCGv_i64 cpu_tmp1_i64;
  77
  78#include "exec/gen-icount.h"
  79
  80#ifdef TARGET_X86_64
  81static int x86_64_hregs;
  82#endif
  83
  84typedef struct DisasContext {
  85    /* current insn context */
  86    int override; /* -1 if no override */
  87    int prefix;
  88    TCGMemOp aflag;
  89    TCGMemOp dflag;
  90    target_ulong pc; /* pc = eip + cs_base */
  91    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
  92                   static state change (stop translation) */
  93    /* current block context */
  94    target_ulong cs_base; /* base of CS segment */
  95    int pe;     /* protected mode */
  96    int code32; /* 32 bit code segment */
  97#ifdef TARGET_X86_64
  98    int lma;    /* long mode active */
  99    int code64; /* 64 bit code segment */
 100    int rex_x, rex_b;
 101#endif
 102    int vex_l;  /* vex vector length */
 103    int vex_v;  /* vex vvvv register, without 1's compliment.  */
 104    int ss32;   /* 32 bit stack segment */
 105    CCOp cc_op;  /* current CC operation */
 106    bool cc_op_dirty;
 107    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 108    int f_st;   /* currently unused */
 109    int vm86;   /* vm86 mode */
 110    int cpl;
 111    int iopl;
 112    int tf;     /* TF cpu flag */
 113    int singlestep_enabled; /* "hardware" single step enabled */
 114    int jmp_opt; /* use direct block chaining for direct jumps */
 115    int repz_opt; /* optimize jumps within repz instructions */
 116    int mem_index; /* select memory access functions */
 117    uint64_t flags; /* all execution flags */
 118    struct TranslationBlock *tb;
 119    int popl_esp_hack; /* for correct popl with esp base handling */
 120    int rip_offset; /* only used in x86_64, but left for simplicity */
 121    int cpuid_features;
 122    int cpuid_ext_features;
 123    int cpuid_ext2_features;
 124    int cpuid_ext3_features;
 125    int cpuid_7_0_ebx_features;
 126} DisasContext;
 127
 128static void gen_eob(DisasContext *s);
 129static void gen_jmp(DisasContext *s, target_ulong eip);
 130static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 131static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 132
 133/* i386 arith/logic operations */
 134enum {
 135    OP_ADDL,
 136    OP_ORL,
 137    OP_ADCL,
 138    OP_SBBL,
 139    OP_ANDL,
 140    OP_SUBL,
 141    OP_XORL,
 142    OP_CMPL,
 143};
 144
 145/* i386 shift ops */
 146enum {
 147    OP_ROL,
 148    OP_ROR,
 149    OP_RCL,
 150    OP_RCR,
 151    OP_SHL,
 152    OP_SHR,
 153    OP_SHL1, /* undocumented */
 154    OP_SAR = 7,
 155};
 156
 157enum {
 158    JCC_O,
 159    JCC_B,
 160    JCC_Z,
 161    JCC_BE,
 162    JCC_S,
 163    JCC_P,
 164    JCC_L,
 165    JCC_LE,
 166};
 167
 168enum {
 169    /* I386 int registers */
 170    OR_EAX,   /* MUST be even numbered */
 171    OR_ECX,
 172    OR_EDX,
 173    OR_EBX,
 174    OR_ESP,
 175    OR_EBP,
 176    OR_ESI,
 177    OR_EDI,
 178
 179    OR_TMP0 = 16,    /* temporary operand register */
 180    OR_TMP1,
 181    OR_A0, /* temporary register used when doing address evaluation */
 182};
 183
 184enum {
 185    USES_CC_DST  = 1,
 186    USES_CC_SRC  = 2,
 187    USES_CC_SRC2 = 4,
 188    USES_CC_SRCT = 8,
 189};
 190
 191/* Bit set if the global variable is live after setting CC_OP to X.  */
 192static const uint8_t cc_op_live[CC_OP_NB] = {
 193    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 194    [CC_OP_EFLAGS] = USES_CC_SRC,
 195    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 196    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 197    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 198    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 199    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 200    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 201    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 202    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 203    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 204    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 205    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 206    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 207    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 208    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 209    [CC_OP_CLR] = 0,
 210};
 211
 212static void set_cc_op(DisasContext *s, CCOp op)
 213{
 214    int dead;
 215
 216    if (s->cc_op == op) {
 217        return;
 218    }
 219
 220    /* Discard CC computation that will no longer be used.  */
 221    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 222    if (dead & USES_CC_DST) {
 223        tcg_gen_discard_tl(cpu_cc_dst);
 224    }
 225    if (dead & USES_CC_SRC) {
 226        tcg_gen_discard_tl(cpu_cc_src);
 227    }
 228    if (dead & USES_CC_SRC2) {
 229        tcg_gen_discard_tl(cpu_cc_src2);
 230    }
 231    if (dead & USES_CC_SRCT) {
 232        tcg_gen_discard_tl(cpu_cc_srcT);
 233    }
 234
 235    if (op == CC_OP_DYNAMIC) {
 236        /* The DYNAMIC setting is translator only, and should never be
 237           stored.  Thus we always consider it clean.  */
 238        s->cc_op_dirty = false;
 239    } else {
 240        /* Discard any computed CC_OP value (see shifts).  */
 241        if (s->cc_op == CC_OP_DYNAMIC) {
 242            tcg_gen_discard_i32(cpu_cc_op);
 243        }
 244        s->cc_op_dirty = true;
 245    }
 246    s->cc_op = op;
 247}
 248
 249static void gen_update_cc_op(DisasContext *s)
 250{
 251    if (s->cc_op_dirty) {
 252        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 253        s->cc_op_dirty = false;
 254    }
 255}
 256
 257#ifdef TARGET_X86_64
 258
 259#define NB_OP_SIZES 4
 260
 261#else /* !TARGET_X86_64 */
 262
 263#define NB_OP_SIZES 3
 264
 265#endif /* !TARGET_X86_64 */
 266
 267#if defined(HOST_WORDS_BIGENDIAN)
 268#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 269#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 270#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 271#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 272#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 273#else
 274#define REG_B_OFFSET 0
 275#define REG_H_OFFSET 1
 276#define REG_W_OFFSET 0
 277#define REG_L_OFFSET 0
 278#define REG_LH_OFFSET 4
 279#endif
 280
 281/* In instruction encodings for byte register accesses the
 282 * register number usually indicates "low 8 bits of register N";
 283 * however there are some special cases where N 4..7 indicates
 284 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 285 * true for this special case, false otherwise.
 286 */
 287static inline bool byte_reg_is_xH(int reg)
 288{
 289    if (reg < 4) {
 290        return false;
 291    }
 292#ifdef TARGET_X86_64
 293    if (reg >= 8 || x86_64_hregs) {
 294        return false;
 295    }
 296#endif
 297    return true;
 298}
 299
 300/* Select the size of a push/pop operation.  */
 301static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 302{
 303    if (CODE64(s)) {
 304        return ot == MO_16 ? MO_16 : MO_64;
 305    } else {
 306        return ot;
 307    }
 308}
 309
 310/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 311static inline TCGMemOp mo_64_32(TCGMemOp ot)
 312{
 313#ifdef TARGET_X86_64
 314    return ot == MO_64 ? MO_64 : MO_32;
 315#else
 316    return MO_32;
 317#endif
 318}
 319
 320/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 321   byte vs word opcodes.  */
 322static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 323{
 324    return b & 1 ? ot : MO_8;
 325}
 326
 327/* Select size 8 if lsb of B is clear, else OT capped at 32.
 328   Used for decoding operand size of port opcodes.  */
 329static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 330{
 331    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 332}
 333
 334static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 335{
 336    switch(ot) {
 337    case MO_8:
 338        if (!byte_reg_is_xH(reg)) {
 339            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 340        } else {
 341            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 342        }
 343        break;
 344    case MO_16:
 345        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 346        break;
 347    case MO_32:
 348        /* For x86_64, this sets the higher half of register to zero.
 349           For i386, this is equivalent to a mov. */
 350        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 351        break;
 352#ifdef TARGET_X86_64
 353    case MO_64:
 354        tcg_gen_mov_tl(cpu_regs[reg], t0);
 355        break;
 356#endif
 357    default:
 358        tcg_abort();
 359    }
 360}
 361
 362static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 363{
 364    if (ot == MO_8 && byte_reg_is_xH(reg)) {
 365        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
 366        tcg_gen_ext8u_tl(t0, t0);
 367    } else {
 368        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 369    }
 370}
 371
 372static inline void gen_op_movl_A0_reg(int reg)
 373{
 374    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
 375}
 376
 377static inline void gen_op_addl_A0_im(int32_t val)
 378{
 379    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 380#ifdef TARGET_X86_64
 381    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
 382#endif
 383}
 384
 385#ifdef TARGET_X86_64
 386static inline void gen_op_addq_A0_im(int64_t val)
 387{
 388    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 389}
 390#endif
 391    
 392static void gen_add_A0_im(DisasContext *s, int val)
 393{
 394#ifdef TARGET_X86_64
 395    if (CODE64(s))
 396        gen_op_addq_A0_im(val);
 397    else
 398#endif
 399        gen_op_addl_A0_im(val);
 400}
 401
 402static inline void gen_op_jmp_v(TCGv dest)
 403{
 404    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 405}
 406
 407static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 408{
 409    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
 410    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 411}
 412
 413static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 414{
 415    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
 416    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 417}
 418
 419static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 420{
 421    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
 422    if (shift != 0)
 423        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
 424    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 425    /* For x86_64, this sets the higher half of register to zero.
 426       For i386, this is equivalent to a nop. */
 427    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 428}
 429
 430static inline void gen_op_movl_A0_seg(int reg)
 431{
 432    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base) + REG_L_OFFSET);
 433}
 434
 435static inline void gen_op_addl_A0_seg(DisasContext *s, int reg)
 436{
 437    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
 438#ifdef TARGET_X86_64
 439    if (CODE64(s)) {
 440        tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
 441        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 442    } else {
 443        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 444        tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
 445    }
 446#else
 447    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 448#endif
 449}
 450
 451#ifdef TARGET_X86_64
 452static inline void gen_op_movq_A0_seg(int reg)
 453{
 454    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base));
 455}
 456
 457static inline void gen_op_addq_A0_seg(int reg)
 458{
 459    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
 460    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 461}
 462
 463static inline void gen_op_movq_A0_reg(int reg)
 464{
 465    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
 466}
 467
 468static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
 469{
 470    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
 471    if (shift != 0)
 472        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
 473    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 474}
 475#endif
 476
 477static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 478{
 479    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 480}
 481
 482static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 483{
 484    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 485}
 486
 487static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 488{
 489    if (d == OR_TMP0) {
 490        gen_op_st_v(s, idx, cpu_T[0], cpu_A0);
 491    } else {
 492        gen_op_mov_reg_v(idx, d, cpu_T[0]);
 493    }
 494}
 495
 496static inline void gen_jmp_im(target_ulong pc)
 497{
 498    tcg_gen_movi_tl(cpu_tmp0, pc);
 499    gen_op_jmp_v(cpu_tmp0);
 500}
 501
 502static inline void gen_string_movl_A0_ESI(DisasContext *s)
 503{
 504    int override;
 505
 506    override = s->override;
 507    switch (s->aflag) {
 508#ifdef TARGET_X86_64
 509    case MO_64:
 510        if (override >= 0) {
 511            gen_op_movq_A0_seg(override);
 512            gen_op_addq_A0_reg_sN(0, R_ESI);
 513        } else {
 514            gen_op_movq_A0_reg(R_ESI);
 515        }
 516        break;
 517#endif
 518    case MO_32:
 519        /* 32 bit address */
 520        if (s->addseg && override < 0)
 521            override = R_DS;
 522        if (override >= 0) {
 523            gen_op_movl_A0_seg(override);
 524            gen_op_addl_A0_reg_sN(0, R_ESI);
 525        } else {
 526            gen_op_movl_A0_reg(R_ESI);
 527        }
 528        break;
 529    case MO_16:
 530        /* 16 address, always override */
 531        if (override < 0)
 532            override = R_DS;
 533        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESI]);
 534        gen_op_addl_A0_seg(s, override);
 535        break;
 536    default:
 537        tcg_abort();
 538    }
 539}
 540
 541static inline void gen_string_movl_A0_EDI(DisasContext *s)
 542{
 543    switch (s->aflag) {
 544#ifdef TARGET_X86_64
 545    case MO_64:
 546        gen_op_movq_A0_reg(R_EDI);
 547        break;
 548#endif
 549    case MO_32:
 550        if (s->addseg) {
 551            gen_op_movl_A0_seg(R_ES);
 552            gen_op_addl_A0_reg_sN(0, R_EDI);
 553        } else {
 554            gen_op_movl_A0_reg(R_EDI);
 555        }
 556        break;
 557    case MO_16:
 558        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_EDI]);
 559        gen_op_addl_A0_seg(s, R_ES);
 560        break;
 561    default:
 562        tcg_abort();
 563    }
 564}
 565
 566static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 567{
 568    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, df));
 569    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
 570};
 571
 572static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 573{
 574    switch (size) {
 575    case MO_8:
 576        if (sign) {
 577            tcg_gen_ext8s_tl(dst, src);
 578        } else {
 579            tcg_gen_ext8u_tl(dst, src);
 580        }
 581        return dst;
 582    case MO_16:
 583        if (sign) {
 584            tcg_gen_ext16s_tl(dst, src);
 585        } else {
 586            tcg_gen_ext16u_tl(dst, src);
 587        }
 588        return dst;
 589#ifdef TARGET_X86_64
 590    case MO_32:
 591        if (sign) {
 592            tcg_gen_ext32s_tl(dst, src);
 593        } else {
 594            tcg_gen_ext32u_tl(dst, src);
 595        }
 596        return dst;
 597#endif
 598    default:
 599        return src;
 600    }
 601}
 602
 603static void gen_extu(TCGMemOp ot, TCGv reg)
 604{
 605    gen_ext_tl(reg, reg, ot, false);
 606}
 607
 608static void gen_exts(TCGMemOp ot, TCGv reg)
 609{
 610    gen_ext_tl(reg, reg, ot, true);
 611}
 612
 613static inline void gen_op_jnz_ecx(TCGMemOp size, TCGLabel *label1)
 614{
 615    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 616    gen_extu(size, cpu_tmp0);
 617    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 618}
 619
 620static inline void gen_op_jz_ecx(TCGMemOp size, TCGLabel *label1)
 621{
 622    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 623    gen_extu(size, cpu_tmp0);
 624    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 625}
 626
 627static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 628{
 629    switch (ot) {
 630    case MO_8:
 631        gen_helper_inb(v, cpu_env, n);
 632        break;
 633    case MO_16:
 634        gen_helper_inw(v, cpu_env, n);
 635        break;
 636    case MO_32:
 637        gen_helper_inl(v, cpu_env, n);
 638        break;
 639    default:
 640        tcg_abort();
 641    }
 642}
 643
 644static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 645{
 646    switch (ot) {
 647    case MO_8:
 648        gen_helper_outb(cpu_env, v, n);
 649        break;
 650    case MO_16:
 651        gen_helper_outw(cpu_env, v, n);
 652        break;
 653    case MO_32:
 654        gen_helper_outl(cpu_env, v, n);
 655        break;
 656    default:
 657        tcg_abort();
 658    }
 659}
 660
 661static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 662                         uint32_t svm_flags)
 663{
 664    target_ulong next_eip;
 665
 666    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 667        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
 668        switch (ot) {
 669        case MO_8:
 670            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
 671            break;
 672        case MO_16:
 673            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
 674            break;
 675        case MO_32:
 676            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
 677            break;
 678        default:
 679            tcg_abort();
 680        }
 681    }
 682    if(s->flags & HF_SVMI_MASK) {
 683        gen_update_cc_op(s);
 684        gen_jmp_im(cur_eip);
 685        svm_flags |= (1 << (4 + ot));
 686        next_eip = s->pc - s->cs_base;
 687        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
 688        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
 689                                tcg_const_i32(svm_flags),
 690                                tcg_const_i32(next_eip - cur_eip));
 691    }
 692}
 693
 694static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 695{
 696    gen_string_movl_A0_ESI(s);
 697    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
 698    gen_string_movl_A0_EDI(s);
 699    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
 700    gen_op_movl_T0_Dshift(ot);
 701    gen_op_add_reg_T0(s->aflag, R_ESI);
 702    gen_op_add_reg_T0(s->aflag, R_EDI);
 703}
 704
 705static void gen_op_update1_cc(void)
 706{
 707    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 708}
 709
 710static void gen_op_update2_cc(void)
 711{
 712    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
 713    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 714}
 715
 716static void gen_op_update3_cc(TCGv reg)
 717{
 718    tcg_gen_mov_tl(cpu_cc_src2, reg);
 719    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
 720    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 721}
 722
 723static inline void gen_op_testl_T0_T1_cc(void)
 724{
 725    tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
 726}
 727
 728static void gen_op_update_neg_cc(void)
 729{
 730    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 731    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
 732    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 733}
 734
 735/* compute all eflags to cc_src */
 736static void gen_compute_eflags(DisasContext *s)
 737{
 738    TCGv zero, dst, src1, src2;
 739    int live, dead;
 740
 741    if (s->cc_op == CC_OP_EFLAGS) {
 742        return;
 743    }
 744    if (s->cc_op == CC_OP_CLR) {
 745        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 746        set_cc_op(s, CC_OP_EFLAGS);
 747        return;
 748    }
 749
 750    TCGV_UNUSED(zero);
 751    dst = cpu_cc_dst;
 752    src1 = cpu_cc_src;
 753    src2 = cpu_cc_src2;
 754
 755    /* Take care to not read values that are not live.  */
 756    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 757    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 758    if (dead) {
 759        zero = tcg_const_tl(0);
 760        if (dead & USES_CC_DST) {
 761            dst = zero;
 762        }
 763        if (dead & USES_CC_SRC) {
 764            src1 = zero;
 765        }
 766        if (dead & USES_CC_SRC2) {
 767            src2 = zero;
 768        }
 769    }
 770
 771    gen_update_cc_op(s);
 772    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 773    set_cc_op(s, CC_OP_EFLAGS);
 774
 775    if (dead) {
 776        tcg_temp_free(zero);
 777    }
 778}
 779
 780typedef struct CCPrepare {
 781    TCGCond cond;
 782    TCGv reg;
 783    TCGv reg2;
 784    target_ulong imm;
 785    target_ulong mask;
 786    bool use_reg2;
 787    bool no_setcond;
 788} CCPrepare;
 789
 790/* compute eflags.C to reg */
 791static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 792{
 793    TCGv t0, t1;
 794    int size, shift;
 795
 796    switch (s->cc_op) {
 797    case CC_OP_SUBB ... CC_OP_SUBQ:
 798        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 799        size = s->cc_op - CC_OP_SUBB;
 800        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 801        /* If no temporary was used, be careful not to alias t1 and t0.  */
 802        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
 803        tcg_gen_mov_tl(t0, cpu_cc_srcT);
 804        gen_extu(size, t0);
 805        goto add_sub;
 806
 807    case CC_OP_ADDB ... CC_OP_ADDQ:
 808        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 809        size = s->cc_op - CC_OP_ADDB;
 810        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 811        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 812    add_sub:
 813        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 814                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 815
 816    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 817    case CC_OP_CLR:
 818        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 819
 820    case CC_OP_INCB ... CC_OP_INCQ:
 821    case CC_OP_DECB ... CC_OP_DECQ:
 822        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 823                             .mask = -1, .no_setcond = true };
 824
 825    case CC_OP_SHLB ... CC_OP_SHLQ:
 826        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 827        size = s->cc_op - CC_OP_SHLB;
 828        shift = (8 << size) - 1;
 829        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 830                             .mask = (target_ulong)1 << shift };
 831
 832    case CC_OP_MULB ... CC_OP_MULQ:
 833        return (CCPrepare) { .cond = TCG_COND_NE,
 834                             .reg = cpu_cc_src, .mask = -1 };
 835
 836    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 837        size = s->cc_op - CC_OP_BMILGB;
 838        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 839        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 840
 841    case CC_OP_ADCX:
 842    case CC_OP_ADCOX:
 843        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 844                             .mask = -1, .no_setcond = true };
 845
 846    case CC_OP_EFLAGS:
 847    case CC_OP_SARB ... CC_OP_SARQ:
 848        /* CC_SRC & 1 */
 849        return (CCPrepare) { .cond = TCG_COND_NE,
 850                             .reg = cpu_cc_src, .mask = CC_C };
 851
 852    default:
 853       /* The need to compute only C from CC_OP_DYNAMIC is important
 854          in efficiently implementing e.g. INC at the start of a TB.  */
 855       gen_update_cc_op(s);
 856       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 857                               cpu_cc_src2, cpu_cc_op);
 858       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 859                            .mask = -1, .no_setcond = true };
 860    }
 861}
 862
 863/* compute eflags.P to reg */
 864static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 865{
 866    gen_compute_eflags(s);
 867    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 868                         .mask = CC_P };
 869}
 870
 871/* compute eflags.S to reg */
 872static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 873{
 874    switch (s->cc_op) {
 875    case CC_OP_DYNAMIC:
 876        gen_compute_eflags(s);
 877        /* FALLTHRU */
 878    case CC_OP_EFLAGS:
 879    case CC_OP_ADCX:
 880    case CC_OP_ADOX:
 881    case CC_OP_ADCOX:
 882        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 883                             .mask = CC_S };
 884    case CC_OP_CLR:
 885        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 886    default:
 887        {
 888            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 889            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 890            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 891        }
 892    }
 893}
 894
 895/* compute eflags.O to reg */
 896static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 897{
 898    switch (s->cc_op) {
 899    case CC_OP_ADOX:
 900    case CC_OP_ADCOX:
 901        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 902                             .mask = -1, .no_setcond = true };
 903    case CC_OP_CLR:
 904        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 905    default:
 906        gen_compute_eflags(s);
 907        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 908                             .mask = CC_O };
 909    }
 910}
 911
 912/* compute eflags.Z to reg */
 913static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 914{
 915    switch (s->cc_op) {
 916    case CC_OP_DYNAMIC:
 917        gen_compute_eflags(s);
 918        /* FALLTHRU */
 919    case CC_OP_EFLAGS:
 920    case CC_OP_ADCX:
 921    case CC_OP_ADOX:
 922    case CC_OP_ADCOX:
 923        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 924                             .mask = CC_Z };
 925    case CC_OP_CLR:
 926        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 927    default:
 928        {
 929            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 930            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 931            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 932        }
 933    }
 934}
 935
 936/* perform a conditional store into register 'reg' according to jump opcode
 937   value 'b'. In the fast case, T0 is guaranted not to be used. */
 938static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 939{
 940    int inv, jcc_op, cond;
 941    TCGMemOp size;
 942    CCPrepare cc;
 943    TCGv t0;
 944
 945    inv = b & 1;
 946    jcc_op = (b >> 1) & 7;
 947
 948    switch (s->cc_op) {
 949    case CC_OP_SUBB ... CC_OP_SUBQ:
 950        /* We optimize relational operators for the cmp/jcc case.  */
 951        size = s->cc_op - CC_OP_SUBB;
 952        switch (jcc_op) {
 953        case JCC_BE:
 954            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 955            gen_extu(size, cpu_tmp4);
 956            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 957            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
 958                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 959            break;
 960
 961        case JCC_L:
 962            cond = TCG_COND_LT;
 963            goto fast_jcc_l;
 964        case JCC_LE:
 965            cond = TCG_COND_LE;
 966        fast_jcc_l:
 967            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 968            gen_exts(size, cpu_tmp4);
 969            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
 970            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
 971                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 972            break;
 973
 974        default:
 975            goto slow_jcc;
 976        }
 977        break;
 978
 979    default:
 980    slow_jcc:
 981        /* This actually generates good code for JC, JZ and JS.  */
 982        switch (jcc_op) {
 983        case JCC_O:
 984            cc = gen_prepare_eflags_o(s, reg);
 985            break;
 986        case JCC_B:
 987            cc = gen_prepare_eflags_c(s, reg);
 988            break;
 989        case JCC_Z:
 990            cc = gen_prepare_eflags_z(s, reg);
 991            break;
 992        case JCC_BE:
 993            gen_compute_eflags(s);
 994            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 995                               .mask = CC_Z | CC_C };
 996            break;
 997        case JCC_S:
 998            cc = gen_prepare_eflags_s(s, reg);
 999            break;
1000        case JCC_P:
1001            cc = gen_prepare_eflags_p(s, reg);
1002            break;
1003        case JCC_L:
1004            gen_compute_eflags(s);
1005            if (TCGV_EQUAL(reg, cpu_cc_src)) {
1006                reg = cpu_tmp0;
1007            }
1008            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1009            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1010            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1011                               .mask = CC_S };
1012            break;
1013        default:
1014        case JCC_LE:
1015            gen_compute_eflags(s);
1016            if (TCGV_EQUAL(reg, cpu_cc_src)) {
1017                reg = cpu_tmp0;
1018            }
1019            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1020            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1021            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1022                               .mask = CC_S | CC_Z };
1023            break;
1024        }
1025        break;
1026    }
1027
1028    if (inv) {
1029        cc.cond = tcg_invert_cond(cc.cond);
1030    }
1031    return cc;
1032}
1033
1034static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1035{
1036    CCPrepare cc = gen_prepare_cc(s, b, reg);
1037
1038    if (cc.no_setcond) {
1039        if (cc.cond == TCG_COND_EQ) {
1040            tcg_gen_xori_tl(reg, cc.reg, 1);
1041        } else {
1042            tcg_gen_mov_tl(reg, cc.reg);
1043        }
1044        return;
1045    }
1046
1047    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1048        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1049        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1050        tcg_gen_andi_tl(reg, reg, 1);
1051        return;
1052    }
1053    if (cc.mask != -1) {
1054        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1055        cc.reg = reg;
1056    }
1057    if (cc.use_reg2) {
1058        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1059    } else {
1060        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1061    }
1062}
1063
1064static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1065{
1066    gen_setcc1(s, JCC_B << 1, reg);
1067}
1068
1069/* generate a conditional jump to label 'l1' according to jump opcode
1070   value 'b'. In the fast case, T0 is guaranted not to be used. */
1071static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1072{
1073    CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
1074
1075    if (cc.mask != -1) {
1076        tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
1077        cc.reg = cpu_T[0];
1078    }
1079    if (cc.use_reg2) {
1080        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1081    } else {
1082        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1083    }
1084}
1085
1086/* Generate a conditional jump to label 'l1' according to jump opcode
1087   value 'b'. In the fast case, T0 is guaranted not to be used.
1088   A translation block must end soon.  */
1089static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1090{
1091    CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
1092
1093    gen_update_cc_op(s);
1094    if (cc.mask != -1) {
1095        tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
1096        cc.reg = cpu_T[0];
1097    }
1098    set_cc_op(s, CC_OP_DYNAMIC);
1099    if (cc.use_reg2) {
1100        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1101    } else {
1102        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1103    }
1104}
1105
1106/* XXX: does not work with gdbstub "ice" single step - not a
1107   serious problem */
1108static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1109{
1110    TCGLabel *l1 = gen_new_label();
1111    TCGLabel *l2 = gen_new_label();
1112    gen_op_jnz_ecx(s->aflag, l1);
1113    gen_set_label(l2);
1114    gen_jmp_tb(s, next_eip, 1);
1115    gen_set_label(l1);
1116    return l2;
1117}
1118
1119static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1120{
1121    gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
1122    gen_string_movl_A0_EDI(s);
1123    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1124    gen_op_movl_T0_Dshift(ot);
1125    gen_op_add_reg_T0(s->aflag, R_EDI);
1126}
1127
1128static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1129{
1130    gen_string_movl_A0_ESI(s);
1131    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1132    gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
1133    gen_op_movl_T0_Dshift(ot);
1134    gen_op_add_reg_T0(s->aflag, R_ESI);
1135}
1136
1137static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1138{
1139    gen_string_movl_A0_EDI(s);
1140    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
1141    gen_op(s, OP_CMPL, ot, R_EAX);
1142    gen_op_movl_T0_Dshift(ot);
1143    gen_op_add_reg_T0(s->aflag, R_EDI);
1144}
1145
1146static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1147{
1148    gen_string_movl_A0_EDI(s);
1149    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
1150    gen_string_movl_A0_ESI(s);
1151    gen_op(s, OP_CMPL, ot, OR_TMP0);
1152    gen_op_movl_T0_Dshift(ot);
1153    gen_op_add_reg_T0(s->aflag, R_ESI);
1154    gen_op_add_reg_T0(s->aflag, R_EDI);
1155}
1156
1157static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1158{
1159    if (s->flags & HF_IOBPT_MASK) {
1160        TCGv_i32 t_size = tcg_const_i32(1 << ot);
1161        TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1162
1163        gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1164        tcg_temp_free_i32(t_size);
1165        tcg_temp_free(t_next);
1166    }
1167}
1168
1169
1170static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1171{
1172    if (s->tb->cflags & CF_USE_ICOUNT) {
1173        gen_io_start();
1174    }
1175    gen_string_movl_A0_EDI(s);
1176    /* Note: we must do this dummy write first to be restartable in
1177       case of page fault. */
1178    tcg_gen_movi_tl(cpu_T[0], 0);
1179    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1180    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1181    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1182    gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
1183    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1184    gen_op_movl_T0_Dshift(ot);
1185    gen_op_add_reg_T0(s->aflag, R_EDI);
1186    gen_bpt_io(s, cpu_tmp2_i32, ot);
1187    if (s->tb->cflags & CF_USE_ICOUNT) {
1188        gen_io_end();
1189    }
1190}
1191
1192static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1193{
1194    if (s->tb->cflags & CF_USE_ICOUNT) {
1195        gen_io_start();
1196    }
1197    gen_string_movl_A0_ESI(s);
1198    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1199
1200    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1201    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1202    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
1203    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1204    gen_op_movl_T0_Dshift(ot);
1205    gen_op_add_reg_T0(s->aflag, R_ESI);
1206    gen_bpt_io(s, cpu_tmp2_i32, ot);
1207    if (s->tb->cflags & CF_USE_ICOUNT) {
1208        gen_io_end();
1209    }
1210}
1211
1212/* same method as Valgrind : we generate jumps to current or next
1213   instruction */
1214#define GEN_REPZ(op)                                                          \
1215static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1216                                 target_ulong cur_eip, target_ulong next_eip) \
1217{                                                                             \
1218    TCGLabel *l2;                                                             \
1219    gen_update_cc_op(s);                                                      \
1220    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1221    gen_ ## op(s, ot);                                                        \
1222    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1223    /* a loop would cause two single step exceptions if ECX = 1               \
1224       before rep string_insn */                                              \
1225    if (s->repz_opt)                                                          \
1226        gen_op_jz_ecx(s->aflag, l2);                                          \
1227    gen_jmp(s, cur_eip);                                                      \
1228}
1229
1230#define GEN_REPZ2(op)                                                         \
1231static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1232                                   target_ulong cur_eip,                      \
1233                                   target_ulong next_eip,                     \
1234                                   int nz)                                    \
1235{                                                                             \
1236    TCGLabel *l2;                                                             \
1237    gen_update_cc_op(s);                                                      \
1238    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1239    gen_ ## op(s, ot);                                                        \
1240    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1241    gen_update_cc_op(s);                                                      \
1242    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1243    if (s->repz_opt)                                                          \
1244        gen_op_jz_ecx(s->aflag, l2);                                          \
1245    gen_jmp(s, cur_eip);                                                      \
1246}
1247
1248GEN_REPZ(movs)
1249GEN_REPZ(stos)
1250GEN_REPZ(lods)
1251GEN_REPZ(ins)
1252GEN_REPZ(outs)
1253GEN_REPZ2(scas)
1254GEN_REPZ2(cmps)
1255
1256static void gen_helper_fp_arith_ST0_FT0(int op)
1257{
1258    switch (op) {
1259    case 0:
1260        gen_helper_fadd_ST0_FT0(cpu_env);
1261        break;
1262    case 1:
1263        gen_helper_fmul_ST0_FT0(cpu_env);
1264        break;
1265    case 2:
1266        gen_helper_fcom_ST0_FT0(cpu_env);
1267        break;
1268    case 3:
1269        gen_helper_fcom_ST0_FT0(cpu_env);
1270        break;
1271    case 4:
1272        gen_helper_fsub_ST0_FT0(cpu_env);
1273        break;
1274    case 5:
1275        gen_helper_fsubr_ST0_FT0(cpu_env);
1276        break;
1277    case 6:
1278        gen_helper_fdiv_ST0_FT0(cpu_env);
1279        break;
1280    case 7:
1281        gen_helper_fdivr_ST0_FT0(cpu_env);
1282        break;
1283    }
1284}
1285
1286/* NOTE the exception in "r" op ordering */
1287static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1288{
1289    TCGv_i32 tmp = tcg_const_i32(opreg);
1290    switch (op) {
1291    case 0:
1292        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1293        break;
1294    case 1:
1295        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1296        break;
1297    case 4:
1298        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1299        break;
1300    case 5:
1301        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1302        break;
1303    case 6:
1304        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1305        break;
1306    case 7:
1307        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1308        break;
1309    }
1310}
1311
1312/* if d == OR_TMP0, it means memory operand (address in A0) */
1313static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1314{
1315    if (d != OR_TMP0) {
1316        gen_op_mov_v_reg(ot, cpu_T[0], d);
1317    } else {
1318        gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
1319    }
1320    switch(op) {
1321    case OP_ADCL:
1322        gen_compute_eflags_c(s1, cpu_tmp4);
1323        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1324        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1325        gen_op_st_rm_T0_A0(s1, ot, d);
1326        gen_op_update3_cc(cpu_tmp4);
1327        set_cc_op(s1, CC_OP_ADCB + ot);
1328        break;
1329    case OP_SBBL:
1330        gen_compute_eflags_c(s1, cpu_tmp4);
1331        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1332        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1333        gen_op_st_rm_T0_A0(s1, ot, d);
1334        gen_op_update3_cc(cpu_tmp4);
1335        set_cc_op(s1, CC_OP_SBBB + ot);
1336        break;
1337    case OP_ADDL:
1338        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1339        gen_op_st_rm_T0_A0(s1, ot, d);
1340        gen_op_update2_cc();
1341        set_cc_op(s1, CC_OP_ADDB + ot);
1342        break;
1343    case OP_SUBL:
1344        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
1345        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1346        gen_op_st_rm_T0_A0(s1, ot, d);
1347        gen_op_update2_cc();
1348        set_cc_op(s1, CC_OP_SUBB + ot);
1349        break;
1350    default:
1351    case OP_ANDL:
1352        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1353        gen_op_st_rm_T0_A0(s1, ot, d);
1354        gen_op_update1_cc();
1355        set_cc_op(s1, CC_OP_LOGICB + ot);
1356        break;
1357    case OP_ORL:
1358        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1359        gen_op_st_rm_T0_A0(s1, ot, d);
1360        gen_op_update1_cc();
1361        set_cc_op(s1, CC_OP_LOGICB + ot);
1362        break;
1363    case OP_XORL:
1364        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1365        gen_op_st_rm_T0_A0(s1, ot, d);
1366        gen_op_update1_cc();
1367        set_cc_op(s1, CC_OP_LOGICB + ot);
1368        break;
1369    case OP_CMPL:
1370        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1371        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
1372        tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
1373        set_cc_op(s1, CC_OP_SUBB + ot);
1374        break;
1375    }
1376}
1377
1378/* if d == OR_TMP0, it means memory operand (address in A0) */
1379static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1380{
1381    if (d != OR_TMP0) {
1382        gen_op_mov_v_reg(ot, cpu_T[0], d);
1383    } else {
1384        gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
1385    }
1386    gen_compute_eflags_c(s1, cpu_cc_src);
1387    if (c > 0) {
1388        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
1389        set_cc_op(s1, CC_OP_INCB + ot);
1390    } else {
1391        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
1392        set_cc_op(s1, CC_OP_DECB + ot);
1393    }
1394    gen_op_st_rm_T0_A0(s1, ot, d);
1395    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1396}
1397
1398static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1399                            TCGv shm1, TCGv count, bool is_right)
1400{
1401    TCGv_i32 z32, s32, oldop;
1402    TCGv z_tl;
1403
1404    /* Store the results into the CC variables.  If we know that the
1405       variable must be dead, store unconditionally.  Otherwise we'll
1406       need to not disrupt the current contents.  */
1407    z_tl = tcg_const_tl(0);
1408    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1409        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1410                           result, cpu_cc_dst);
1411    } else {
1412        tcg_gen_mov_tl(cpu_cc_dst, result);
1413    }
1414    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1415        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1416                           shm1, cpu_cc_src);
1417    } else {
1418        tcg_gen_mov_tl(cpu_cc_src, shm1);
1419    }
1420    tcg_temp_free(z_tl);
1421
1422    /* Get the two potential CC_OP values into temporaries.  */
1423    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1424    if (s->cc_op == CC_OP_DYNAMIC) {
1425        oldop = cpu_cc_op;
1426    } else {
1427        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1428        oldop = cpu_tmp3_i32;
1429    }
1430
1431    /* Conditionally store the CC_OP value.  */
1432    z32 = tcg_const_i32(0);
1433    s32 = tcg_temp_new_i32();
1434    tcg_gen_trunc_tl_i32(s32, count);
1435    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1436    tcg_temp_free_i32(z32);
1437    tcg_temp_free_i32(s32);
1438
1439    /* The CC_OP value is no longer predictable.  */
1440    set_cc_op(s, CC_OP_DYNAMIC);
1441}
1442
1443static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1444                            int is_right, int is_arith)
1445{
1446    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1447
1448    /* load */
1449    if (op1 == OR_TMP0) {
1450        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1451    } else {
1452        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1453    }
1454
1455    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1456    tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1);
1457
1458    if (is_right) {
1459        if (is_arith) {
1460            gen_exts(ot, cpu_T[0]);
1461            tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1462            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1463        } else {
1464            gen_extu(ot, cpu_T[0]);
1465            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1466            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1467        }
1468    } else {
1469        tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1470        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1471    }
1472
1473    /* store */
1474    gen_op_st_rm_T0_A0(s, ot, op1);
1475
1476    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
1477}
1478
1479static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1480                            int is_right, int is_arith)
1481{
1482    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1483
1484    /* load */
1485    if (op1 == OR_TMP0)
1486        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1487    else
1488        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1489
1490    op2 &= mask;
1491    if (op2 != 0) {
1492        if (is_right) {
1493            if (is_arith) {
1494                gen_exts(ot, cpu_T[0]);
1495                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1496                tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
1497            } else {
1498                gen_extu(ot, cpu_T[0]);
1499                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1500                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
1501            }
1502        } else {
1503            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1504            tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
1505        }
1506    }
1507
1508    /* store */
1509    gen_op_st_rm_T0_A0(s, ot, op1);
1510
1511    /* update eflags if non zero shift */
1512    if (op2 != 0) {
1513        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1514        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1515        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1516    }
1517}
1518
1519static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1520{
1521    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1522    TCGv_i32 t0, t1;
1523
1524    /* load */
1525    if (op1 == OR_TMP0) {
1526        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1527    } else {
1528        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1529    }
1530
1531    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1532
1533    switch (ot) {
1534    case MO_8:
1535        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1536        tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
1537        tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101);
1538        goto do_long;
1539    case MO_16:
1540        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1541        tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16);
1542        goto do_long;
1543    do_long:
1544#ifdef TARGET_X86_64
1545    case MO_32:
1546        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
1547        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
1548        if (is_right) {
1549            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1550        } else {
1551            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1552        }
1553        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
1554        break;
1555#endif
1556    default:
1557        if (is_right) {
1558            tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1559        } else {
1560            tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1561        }
1562        break;
1563    }
1564
1565    /* store */
1566    gen_op_st_rm_T0_A0(s, ot, op1);
1567
1568    /* We'll need the flags computed into CC_SRC.  */
1569    gen_compute_eflags(s);
1570
1571    /* The value that was "rotated out" is now present at the other end
1572       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1573       since we've computed the flags into CC_SRC, these variables are
1574       currently dead.  */
1575    if (is_right) {
1576        tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
1577        tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
1578        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1579    } else {
1580        tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
1581        tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
1582    }
1583    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1584    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1585
1586    /* Now conditionally store the new CC_OP value.  If the shift count
1587       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1588       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1589       exactly as we computed above.  */
1590    t0 = tcg_const_i32(0);
1591    t1 = tcg_temp_new_i32();
1592    tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
1593    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
1594    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1595    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1596                        cpu_tmp2_i32, cpu_tmp3_i32);
1597    tcg_temp_free_i32(t0);
1598    tcg_temp_free_i32(t1);
1599
1600    /* The CC_OP value is no longer predictable.  */ 
1601    set_cc_op(s, CC_OP_DYNAMIC);
1602}
1603
1604static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1605                          int is_right)
1606{
1607    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1608    int shift;
1609
1610    /* load */
1611    if (op1 == OR_TMP0) {
1612        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1613    } else {
1614        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1615    }
1616
1617    op2 &= mask;
1618    if (op2 != 0) {
1619        switch (ot) {
1620#ifdef TARGET_X86_64
1621        case MO_32:
1622            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
1623            if (is_right) {
1624                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1625            } else {
1626                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1627            }
1628            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
1629            break;
1630#endif
1631        default:
1632            if (is_right) {
1633                tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2);
1634            } else {
1635                tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2);
1636            }
1637            break;
1638        case MO_8:
1639            mask = 7;
1640            goto do_shifts;
1641        case MO_16:
1642            mask = 15;
1643        do_shifts:
1644            shift = op2 & mask;
1645            if (is_right) {
1646                shift = mask + 1 - shift;
1647            }
1648            gen_extu(ot, cpu_T[0]);
1649            tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift);
1650            tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift);
1651            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
1652            break;
1653        }
1654    }
1655
1656    /* store */
1657    gen_op_st_rm_T0_A0(s, ot, op1);
1658
1659    if (op2 != 0) {
1660        /* Compute the flags into CC_SRC.  */
1661        gen_compute_eflags(s);
1662
1663        /* The value that was "rotated out" is now present at the other end
1664           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1665           since we've computed the flags into CC_SRC, these variables are
1666           currently dead.  */
1667        if (is_right) {
1668            tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
1669            tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
1670            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1671        } else {
1672            tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
1673            tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
1674        }
1675        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1676        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1677        set_cc_op(s, CC_OP_ADCOX);
1678    }
1679}
1680
1681/* XXX: add faster immediate = 1 case */
1682static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1683                           int is_right)
1684{
1685    gen_compute_eflags(s);
1686    assert(s->cc_op == CC_OP_EFLAGS);
1687
1688    /* load */
1689    if (op1 == OR_TMP0)
1690        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1691    else
1692        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1693    
1694    if (is_right) {
1695        switch (ot) {
1696        case MO_8:
1697            gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1698            break;
1699        case MO_16:
1700            gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1701            break;
1702        case MO_32:
1703            gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1704            break;
1705#ifdef TARGET_X86_64
1706        case MO_64:
1707            gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1708            break;
1709#endif
1710        default:
1711            tcg_abort();
1712        }
1713    } else {
1714        switch (ot) {
1715        case MO_8:
1716            gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1717            break;
1718        case MO_16:
1719            gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1720            break;
1721        case MO_32:
1722            gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1723            break;
1724#ifdef TARGET_X86_64
1725        case MO_64:
1726            gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1727            break;
1728#endif
1729        default:
1730            tcg_abort();
1731        }
1732    }
1733    /* store */
1734    gen_op_st_rm_T0_A0(s, ot, op1);
1735}
1736
1737/* XXX: add faster immediate case */
1738static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1739                             bool is_right, TCGv count_in)
1740{
1741    target_ulong mask = (ot == MO_64 ? 63 : 31);
1742    TCGv count;
1743
1744    /* load */
1745    if (op1 == OR_TMP0) {
1746        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1747    } else {
1748        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1749    }
1750
1751    count = tcg_temp_new();
1752    tcg_gen_andi_tl(count, count_in, mask);
1753
1754    switch (ot) {
1755    case MO_16:
1756        /* Note: we implement the Intel behaviour for shift count > 16.
1757           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1758           portion by constructing it as a 32-bit value.  */
1759        if (is_right) {
1760            tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16);
1761            tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
1762            tcg_gen_mov_tl(cpu_T[0], cpu_tmp0);
1763        } else {
1764            tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16);
1765        }
1766        /* FALLTHRU */
1767#ifdef TARGET_X86_64
1768    case MO_32:
1769        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1770        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1771        if (is_right) {
1772            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]);
1773            tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
1774            tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count);
1775        } else {
1776            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]);
1777            tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
1778            tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count);
1779            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1780            tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32);
1781        }
1782        break;
1783#endif
1784    default:
1785        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1786        if (is_right) {
1787            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1788
1789            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1790            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count);
1791            tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
1792        } else {
1793            tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1794            if (ot == MO_16) {
1795                /* Only needed if count > 16, for Intel behaviour.  */
1796                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1797                tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
1798                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1799            }
1800
1801            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1802            tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count);
1803            tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
1804        }
1805        tcg_gen_movi_tl(cpu_tmp4, 0);
1806        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4,
1807                           cpu_tmp4, cpu_T[1]);
1808        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1809        break;
1810    }
1811
1812    /* store */
1813    gen_op_st_rm_T0_A0(s, ot, op1);
1814
1815    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
1816    tcg_temp_free(count);
1817}
1818
1819static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1820{
1821    if (s != OR_TMP1)
1822        gen_op_mov_v_reg(ot, cpu_T[1], s);
1823    switch(op) {
1824    case OP_ROL:
1825        gen_rot_rm_T1(s1, ot, d, 0);
1826        break;
1827    case OP_ROR:
1828        gen_rot_rm_T1(s1, ot, d, 1);
1829        break;
1830    case OP_SHL:
1831    case OP_SHL1:
1832        gen_shift_rm_T1(s1, ot, d, 0, 0);
1833        break;
1834    case OP_SHR:
1835        gen_shift_rm_T1(s1, ot, d, 1, 0);
1836        break;
1837    case OP_SAR:
1838        gen_shift_rm_T1(s1, ot, d, 1, 1);
1839        break;
1840    case OP_RCL:
1841        gen_rotc_rm_T1(s1, ot, d, 0);
1842        break;
1843    case OP_RCR:
1844        gen_rotc_rm_T1(s1, ot, d, 1);
1845        break;
1846    }
1847}
1848
1849static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1850{
1851    switch(op) {
1852    case OP_ROL:
1853        gen_rot_rm_im(s1, ot, d, c, 0);
1854        break;
1855    case OP_ROR:
1856        gen_rot_rm_im(s1, ot, d, c, 1);
1857        break;
1858    case OP_SHL:
1859    case OP_SHL1:
1860        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1861        break;
1862    case OP_SHR:
1863        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1864        break;
1865    case OP_SAR:
1866        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1867        break;
1868    default:
1869        /* currently not optimized */
1870        tcg_gen_movi_tl(cpu_T[1], c);
1871        gen_shift(s1, op, ot, d, OR_TMP1);
1872        break;
1873    }
1874}
1875
1876static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
1877{
1878    target_long disp;
1879    int havesib;
1880    int base;
1881    int index;
1882    int scale;
1883    int mod, rm, code, override, must_add_seg;
1884    TCGv sum;
1885
1886    override = s->override;
1887    must_add_seg = s->addseg;
1888    if (override >= 0)
1889        must_add_seg = 1;
1890    mod = (modrm >> 6) & 3;
1891    rm = modrm & 7;
1892
1893    switch (s->aflag) {
1894    case MO_64:
1895    case MO_32:
1896        havesib = 0;
1897        base = rm;
1898        index = -1;
1899        scale = 0;
1900
1901        if (base == 4) {
1902            havesib = 1;
1903            code = cpu_ldub_code(env, s->pc++);
1904            scale = (code >> 6) & 3;
1905            index = ((code >> 3) & 7) | REX_X(s);
1906            if (index == 4) {
1907                index = -1;  /* no index */
1908            }
1909            base = (code & 7);
1910        }
1911        base |= REX_B(s);
1912
1913        switch (mod) {
1914        case 0:
1915            if ((base & 7) == 5) {
1916                base = -1;
1917                disp = (int32_t)cpu_ldl_code(env, s->pc);
1918                s->pc += 4;
1919                if (CODE64(s) && !havesib) {
1920                    disp += s->pc + s->rip_offset;
1921                }
1922            } else {
1923                disp = 0;
1924            }
1925            break;
1926        case 1:
1927            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1928            break;
1929        default:
1930        case 2:
1931            disp = (int32_t)cpu_ldl_code(env, s->pc);
1932            s->pc += 4;
1933            break;
1934        }
1935
1936        /* For correct popl handling with esp.  */
1937        if (base == R_ESP && s->popl_esp_hack) {
1938            disp += s->popl_esp_hack;
1939        }
1940
1941        /* Compute the address, with a minimum number of TCG ops.  */
1942        TCGV_UNUSED(sum);
1943        if (index >= 0) {
1944            if (scale == 0) {
1945                sum = cpu_regs[index];
1946            } else {
1947                tcg_gen_shli_tl(cpu_A0, cpu_regs[index], scale);
1948                sum = cpu_A0;
1949            }
1950            if (base >= 0) {
1951                tcg_gen_add_tl(cpu_A0, sum, cpu_regs[base]);
1952                sum = cpu_A0;
1953            }
1954        } else if (base >= 0) {
1955            sum = cpu_regs[base];
1956        }
1957        if (TCGV_IS_UNUSED(sum)) {
1958            tcg_gen_movi_tl(cpu_A0, disp);
1959        } else {
1960            tcg_gen_addi_tl(cpu_A0, sum, disp);
1961        }
1962
1963        if (must_add_seg) {
1964            if (override < 0) {
1965                if (base == R_EBP || base == R_ESP) {
1966                    override = R_SS;
1967                } else {
1968                    override = R_DS;
1969                }
1970            }
1971
1972            tcg_gen_ld_tl(cpu_tmp0, cpu_env,
1973                          offsetof(CPUX86State, segs[override].base));
1974            if (CODE64(s)) {
1975                if (s->aflag == MO_32) {
1976                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
1977                }
1978                tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
1979                return;
1980            }
1981
1982            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
1983        }
1984
1985        if (s->aflag == MO_32) {
1986            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
1987        }
1988        break;
1989
1990    case MO_16:
1991        switch (mod) {
1992        case 0:
1993            if (rm == 6) {
1994                disp = cpu_lduw_code(env, s->pc);
1995                s->pc += 2;
1996                tcg_gen_movi_tl(cpu_A0, disp);
1997                rm = 0; /* avoid SS override */
1998                goto no_rm;
1999            } else {
2000                disp = 0;
2001            }
2002            break;
2003        case 1:
2004            disp = (int8_t)cpu_ldub_code(env, s->pc++);
2005            break;
2006        default:
2007        case 2:
2008            disp = (int16_t)cpu_lduw_code(env, s->pc);
2009            s->pc += 2;
2010            break;
2011        }
2012
2013        sum = cpu_A0;
2014        switch (rm) {
2015        case 0:
2016            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_ESI]);
2017            break;
2018        case 1:
2019            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_EDI]);
2020            break;
2021        case 2:
2022            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_ESI]);
2023            break;
2024        case 3:
2025            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_EDI]);
2026            break;
2027        case 4:
2028            sum = cpu_regs[R_ESI];
2029            break;
2030        case 5:
2031            sum = cpu_regs[R_EDI];
2032            break;
2033        case 6:
2034            sum = cpu_regs[R_EBP];
2035            break;
2036        default:
2037        case 7:
2038            sum = cpu_regs[R_EBX];
2039            break;
2040        }
2041        tcg_gen_addi_tl(cpu_A0, sum, disp);
2042        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2043    no_rm:
2044        if (must_add_seg) {
2045            if (override < 0) {
2046                if (rm == 2 || rm == 3 || rm == 6) {
2047                    override = R_SS;
2048                } else {
2049                    override = R_DS;
2050                }
2051            }
2052            gen_op_addl_A0_seg(s, override);
2053        }
2054        break;
2055
2056    default:
2057        tcg_abort();
2058    }
2059}
2060
2061static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2062{
2063    int mod, rm, base, code;
2064
2065    mod = (modrm >> 6) & 3;
2066    if (mod == 3)
2067        return;
2068    rm = modrm & 7;
2069
2070    switch (s->aflag) {
2071    case MO_64:
2072    case MO_32:
2073        base = rm;
2074
2075        if (base == 4) {
2076            code = cpu_ldub_code(env, s->pc++);
2077            base = (code & 7);
2078        }
2079
2080        switch (mod) {
2081        case 0:
2082            if (base == 5) {
2083                s->pc += 4;
2084            }
2085            break;
2086        case 1:
2087            s->pc++;
2088            break;
2089        default:
2090        case 2:
2091            s->pc += 4;
2092            break;
2093        }
2094        break;
2095
2096    case MO_16:
2097        switch (mod) {
2098        case 0:
2099            if (rm == 6) {
2100                s->pc += 2;
2101            }
2102            break;
2103        case 1:
2104            s->pc++;
2105            break;
2106        default:
2107        case 2:
2108            s->pc += 2;
2109            break;
2110        }
2111        break;
2112
2113    default:
2114        tcg_abort();
2115    }
2116}
2117
2118/* used for LEA and MOV AX, mem */
2119static void gen_add_A0_ds_seg(DisasContext *s)
2120{
2121    int override, must_add_seg;
2122    must_add_seg = s->addseg;
2123    override = R_DS;
2124    if (s->override >= 0) {
2125        override = s->override;
2126        must_add_seg = 1;
2127    }
2128    if (must_add_seg) {
2129#ifdef TARGET_X86_64
2130        if (CODE64(s)) {
2131            gen_op_addq_A0_seg(override);
2132        } else
2133#endif
2134        {
2135            gen_op_addl_A0_seg(s, override);
2136        }
2137    }
2138}
2139
2140/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2141   OR_TMP0 */
2142static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2143                           TCGMemOp ot, int reg, int is_store)
2144{
2145    int mod, rm;
2146
2147    mod = (modrm >> 6) & 3;
2148    rm = (modrm & 7) | REX_B(s);
2149    if (mod == 3) {
2150        if (is_store) {
2151            if (reg != OR_TMP0)
2152                gen_op_mov_v_reg(ot, cpu_T[0], reg);
2153            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
2154        } else {
2155            gen_op_mov_v_reg(ot, cpu_T[0], rm);
2156            if (reg != OR_TMP0)
2157                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2158        }
2159    } else {
2160        gen_lea_modrm(env, s, modrm);
2161        if (is_store) {
2162            if (reg != OR_TMP0)
2163                gen_op_mov_v_reg(ot, cpu_T[0], reg);
2164            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2165        } else {
2166            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
2167            if (reg != OR_TMP0)
2168                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2169        }
2170    }
2171}
2172
2173static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2174{
2175    uint32_t ret;
2176
2177    switch (ot) {
2178    case MO_8:
2179        ret = cpu_ldub_code(env, s->pc);
2180        s->pc++;
2181        break;
2182    case MO_16:
2183        ret = cpu_lduw_code(env, s->pc);
2184        s->pc += 2;
2185        break;
2186    case MO_32:
2187#ifdef TARGET_X86_64
2188    case MO_64:
2189#endif
2190        ret = cpu_ldl_code(env, s->pc);
2191        s->pc += 4;
2192        break;
2193    default:
2194        tcg_abort();
2195    }
2196    return ret;
2197}
2198
2199static inline int insn_const_size(TCGMemOp ot)
2200{
2201    if (ot <= MO_32) {
2202        return 1 << ot;
2203    } else {
2204        return 4;
2205    }
2206}
2207
2208static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2209{
2210    TranslationBlock *tb;
2211    target_ulong pc;
2212
2213    pc = s->cs_base + eip;
2214    tb = s->tb;
2215    /* NOTE: we handle the case where the TB spans two pages here */
2216    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2217        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
2218        /* jump to same page: we can use a direct jump */
2219        tcg_gen_goto_tb(tb_num);
2220        gen_jmp_im(eip);
2221        tcg_gen_exit_tb((uintptr_t)tb + tb_num);
2222    } else {
2223        /* jump to another page: currently not optimized */
2224        gen_jmp_im(eip);
2225        gen_eob(s);
2226    }
2227}
2228
2229static inline void gen_jcc(DisasContext *s, int b,
2230                           target_ulong val, target_ulong next_eip)
2231{
2232    TCGLabel *l1, *l2;
2233
2234    if (s->jmp_opt) {
2235        l1 = gen_new_label();
2236        gen_jcc1(s, b, l1);
2237
2238        gen_goto_tb(s, 0, next_eip);
2239
2240        gen_set_label(l1);
2241        gen_goto_tb(s, 1, val);
2242        s->is_jmp = DISAS_TB_JUMP;
2243    } else {
2244        l1 = gen_new_label();
2245        l2 = gen_new_label();
2246        gen_jcc1(s, b, l1);
2247
2248        gen_jmp_im(next_eip);
2249        tcg_gen_br(l2);
2250
2251        gen_set_label(l1);
2252        gen_jmp_im(val);
2253        gen_set_label(l2);
2254        gen_eob(s);
2255    }
2256}
2257
2258static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2259                        int modrm, int reg)
2260{
2261    CCPrepare cc;
2262
2263    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2264
2265    cc = gen_prepare_cc(s, b, cpu_T[1]);
2266    if (cc.mask != -1) {
2267        TCGv t0 = tcg_temp_new();
2268        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2269        cc.reg = t0;
2270    }
2271    if (!cc.use_reg2) {
2272        cc.reg2 = tcg_const_tl(cc.imm);
2273    }
2274
2275    tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2,
2276                       cpu_T[0], cpu_regs[reg]);
2277    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2278
2279    if (cc.mask != -1) {
2280        tcg_temp_free(cc.reg);
2281    }
2282    if (!cc.use_reg2) {
2283        tcg_temp_free(cc.reg2);
2284    }
2285}
2286
2287static inline void gen_op_movl_T0_seg(int seg_reg)
2288{
2289    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
2290                     offsetof(CPUX86State,segs[seg_reg].selector));
2291}
2292
2293static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2294{
2295    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
2296    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
2297                    offsetof(CPUX86State,segs[seg_reg].selector));
2298    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
2299    tcg_gen_st_tl(cpu_T[0], cpu_env, 
2300                  offsetof(CPUX86State,segs[seg_reg].base));
2301}
2302
2303/* move T0 to seg_reg and compute if the CPU state may change. Never
2304   call this function with seg_reg == R_CS */
2305static void gen_movl_seg_T0(DisasContext *s, int seg_reg)
2306{
2307    if (s->pe && !s->vm86) {
2308        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
2309        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2310        /* abort translation because the addseg value may change or
2311           because ss32 may change. For R_SS, translation must always
2312           stop as a special handling must be done to disable hardware
2313           interrupts for the next instruction */
2314        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2315            s->is_jmp = DISAS_TB_JUMP;
2316    } else {
2317        gen_op_movl_seg_T0_vm(seg_reg);
2318        if (seg_reg == R_SS)
2319            s->is_jmp = DISAS_TB_JUMP;
2320    }
2321}
2322
2323static inline int svm_is_rep(int prefixes)
2324{
2325    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2326}
2327
2328static inline void
2329gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2330                              uint32_t type, uint64_t param)
2331{
2332    /* no SVM activated; fast case */
2333    if (likely(!(s->flags & HF_SVMI_MASK)))
2334        return;
2335    gen_update_cc_op(s);
2336    gen_jmp_im(pc_start - s->cs_base);
2337    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2338                                         tcg_const_i64(param));
2339}
2340
2341static inline void
2342gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2343{
2344    gen_svm_check_intercept_param(s, pc_start, type, 0);
2345}
2346
2347static inline void gen_stack_update(DisasContext *s, int addend)
2348{
2349#ifdef TARGET_X86_64
2350    if (CODE64(s)) {
2351        gen_op_add_reg_im(MO_64, R_ESP, addend);
2352    } else
2353#endif
2354    if (s->ss32) {
2355        gen_op_add_reg_im(MO_32, R_ESP, addend);
2356    } else {
2357        gen_op_add_reg_im(MO_16, R_ESP, addend);
2358    }
2359}
2360
2361/* Generate a push. It depends on ss32, addseg and dflag.  */
2362static void gen_push_v(DisasContext *s, TCGv val)
2363{
2364    TCGMemOp a_ot, d_ot = mo_pushpop(s, s->dflag);
2365    int size = 1 << d_ot;
2366    TCGv new_esp = cpu_A0;
2367
2368    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2369
2370    if (CODE64(s)) {
2371        a_ot = MO_64;
2372    } else if (s->ss32) {
2373        a_ot = MO_32;
2374        if (s->addseg) {
2375            new_esp = cpu_tmp4;
2376            tcg_gen_mov_tl(new_esp, cpu_A0);
2377            gen_op_addl_A0_seg(s, R_SS);
2378        } else {
2379            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
2380        }
2381    } else {
2382        a_ot = MO_16;
2383        new_esp = cpu_tmp4;
2384        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2385        tcg_gen_mov_tl(new_esp, cpu_A0);
2386        gen_op_addl_A0_seg(s, R_SS);
2387    }
2388
2389    gen_op_st_v(s, d_ot, val, cpu_A0);
2390    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2391}
2392
2393/* two step pop is necessary for precise exceptions */
2394static TCGMemOp gen_pop_T0(DisasContext *s)
2395{
2396    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2397    TCGv addr = cpu_A0;
2398
2399    if (CODE64(s)) {
2400        addr = cpu_regs[R_ESP];
2401    } else if (!s->ss32) {
2402        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESP]);
2403        gen_op_addl_A0_seg(s, R_SS);
2404    } else if (s->addseg) {
2405        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_ESP]);
2406        gen_op_addl_A0_seg(s, R_SS);
2407    } else {
2408        tcg_gen_ext32u_tl(cpu_A0, cpu_regs[R_ESP]);
2409    }
2410
2411    gen_op_ld_v(s, d_ot, cpu_T[0], addr);
2412    return d_ot;
2413}
2414
2415static void gen_pop_update(DisasContext *s, TCGMemOp ot)
2416{
2417    gen_stack_update(s, 1 << ot);
2418}
2419
2420static void gen_stack_A0(DisasContext *s)
2421{
2422    gen_op_movl_A0_reg(R_ESP);
2423    if (!s->ss32)
2424        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2425    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2426    if (s->addseg)
2427        gen_op_addl_A0_seg(s, R_SS);
2428}
2429
2430/* NOTE: wrap around in 16 bit not fully handled */
2431static void gen_pusha(DisasContext *s)
2432{
2433    int i;
2434    gen_op_movl_A0_reg(R_ESP);
2435    gen_op_addl_A0_im(-(8 << s->dflag));
2436    if (!s->ss32)
2437        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2438    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2439    if (s->addseg)
2440        gen_op_addl_A0_seg(s, R_SS);
2441    for(i = 0;i < 8; i++) {
2442        gen_op_mov_v_reg(MO_32, cpu_T[0], 7 - i);
2443        gen_op_st_v(s, s->dflag, cpu_T[0], cpu_A0);
2444        gen_op_addl_A0_im(1 << s->dflag);
2445    }
2446    gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2447}
2448
2449/* NOTE: wrap around in 16 bit not fully handled */
2450static void gen_popa(DisasContext *s)
2451{
2452    int i;
2453    gen_op_movl_A0_reg(R_ESP);
2454    if (!s->ss32)
2455        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2456    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2457    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 8 << s->dflag);
2458    if (s->addseg)
2459        gen_op_addl_A0_seg(s, R_SS);
2460    for(i = 0;i < 8; i++) {
2461        /* ESP is not reloaded */
2462        if (i != 3) {
2463            gen_op_ld_v(s, s->dflag, cpu_T[0], cpu_A0);
2464            gen_op_mov_reg_v(s->dflag, 7 - i, cpu_T[0]);
2465        }
2466        gen_op_addl_A0_im(1 << s->dflag);
2467    }
2468    gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2469}
2470
2471static void gen_enter(DisasContext *s, int esp_addend, int level)
2472{
2473    TCGMemOp ot = mo_pushpop(s, s->dflag);
2474    int opsize = 1 << ot;
2475
2476    level &= 0x1f;
2477#ifdef TARGET_X86_64
2478    if (CODE64(s)) {
2479        gen_op_movl_A0_reg(R_ESP);
2480        gen_op_addq_A0_im(-opsize);
2481        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2482
2483        /* push bp */
2484        gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
2485        gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2486        if (level) {
2487            /* XXX: must save state */
2488            gen_helper_enter64_level(cpu_env, tcg_const_i32(level),
2489                                     tcg_const_i32((ot == MO_64)),
2490                                     cpu_T[1]);
2491        }
2492        gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
2493        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2494        gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[1]);
2495    } else
2496#endif
2497    {
2498        gen_op_movl_A0_reg(R_ESP);
2499        gen_op_addl_A0_im(-opsize);
2500        if (!s->ss32)
2501            tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2502        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2503        if (s->addseg)
2504            gen_op_addl_A0_seg(s, R_SS);
2505        /* push bp */
2506        gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
2507        gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2508        if (level) {
2509            /* XXX: must save state */
2510            gen_helper_enter_level(cpu_env, tcg_const_i32(level),
2511                                   tcg_const_i32(s->dflag - 1),
2512                                   cpu_T[1]);
2513        }
2514        gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
2515        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2516        gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2517    }
2518}
2519
2520static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2521{
2522    gen_update_cc_op(s);
2523    gen_jmp_im(cur_eip);
2524    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2525    s->is_jmp = DISAS_TB_JUMP;
2526}
2527
2528/* an interrupt is different from an exception because of the
2529   privilege checks */
2530static void gen_interrupt(DisasContext *s, int intno,
2531                          target_ulong cur_eip, target_ulong next_eip)
2532{
2533    gen_update_cc_op(s);
2534    gen_jmp_im(cur_eip);
2535    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2536                               tcg_const_i32(next_eip - cur_eip));
2537    s->is_jmp = DISAS_TB_JUMP;
2538}
2539
2540static void gen_debug(DisasContext *s, target_ulong cur_eip)
2541{
2542    gen_update_cc_op(s);
2543    gen_jmp_im(cur_eip);
2544    gen_helper_debug(cpu_env);
2545    s->is_jmp = DISAS_TB_JUMP;
2546}
2547
2548/* generate a generic end of block. Trace exception is also generated
2549   if needed */
2550static void gen_eob(DisasContext *s)
2551{
2552    gen_update_cc_op(s);
2553    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
2554        gen_helper_reset_inhibit_irq(cpu_env);
2555    }
2556    if (s->tb->flags & HF_RF_MASK) {
2557        gen_helper_reset_rf(cpu_env);
2558    }
2559    if (s->singlestep_enabled) {
2560        gen_helper_debug(cpu_env);
2561    } else if (s->tf) {
2562        gen_helper_single_step(cpu_env);
2563    } else {
2564        tcg_gen_exit_tb(0);
2565    }
2566    s->is_jmp = DISAS_TB_JUMP;
2567}
2568
2569/* generate a jump to eip. No segment change must happen before as a
2570   direct call to the next block may occur */
2571static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2572{
2573    gen_update_cc_op(s);
2574    set_cc_op(s, CC_OP_DYNAMIC);
2575    if (s->jmp_opt) {
2576        gen_goto_tb(s, tb_num, eip);
2577        s->is_jmp = DISAS_TB_JUMP;
2578    } else {
2579        gen_jmp_im(eip);
2580        gen_eob(s);
2581    }
2582}
2583
2584static void gen_jmp(DisasContext *s, target_ulong eip)
2585{
2586    gen_jmp_tb(s, eip, 0);
2587}
2588
2589static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2590{
2591    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2592    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2593}
2594
2595static inline void gen_stq_env_A0(DisasContext *s, int offset)
2596{
2597    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2598    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2599}
2600
2601static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2602{
2603    int mem_index = s->mem_index;
2604    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2605    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2606    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2607    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2608    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2609}
2610
2611static inline void gen_sto_env_A0(DisasContext *s, int offset)
2612{
2613    int mem_index = s->mem_index;
2614    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2615    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2616    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2617    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2618    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2619}
2620
2621static inline void gen_op_movo(int d_offset, int s_offset)
2622{
2623    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(0)));
2624    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(0)));
2625    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + offsetof(XMMReg, XMM_Q(1)));
2626    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + offsetof(XMMReg, XMM_Q(1)));
2627}
2628
2629static inline void gen_op_movq(int d_offset, int s_offset)
2630{
2631    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2632    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2633}
2634
2635static inline void gen_op_movl(int d_offset, int s_offset)
2636{
2637    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2638    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2639}
2640
2641static inline void gen_op_movq_env_0(int d_offset)
2642{
2643    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2644    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2645}
2646
2647typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2648typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2649typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2650typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2651typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2652typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2653                               TCGv_i32 val);
2654typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2655typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2656                               TCGv val);
2657
2658#define SSE_SPECIAL ((void *)1)
2659#define SSE_DUMMY ((void *)2)
2660
2661#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2662#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2663                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2664
2665static const SSEFunc_0_epp sse_op_table1[256][4] = {
2666    /* 3DNow! extensions */
2667    [0x0e] = { SSE_DUMMY }, /* femms */
2668    [0x0f] = { SSE_DUMMY }, /* pf... */
2669    /* pure SSE operations */
2670    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2671    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2672    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2673    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2674    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2675    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2676    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2677    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2678
2679    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2680    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2681    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2682    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2683    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2684    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2685    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2686    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2687    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2688    [0x51] = SSE_FOP(sqrt),
2689    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2690    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2691    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2692    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2693    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2694    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2695    [0x58] = SSE_FOP(add),
2696    [0x59] = SSE_FOP(mul),
2697    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2698               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2699    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2700    [0x5c] = SSE_FOP(sub),
2701    [0x5d] = SSE_FOP(min),
2702    [0x5e] = SSE_FOP(div),
2703    [0x5f] = SSE_FOP(max),
2704
2705    [0xc2] = SSE_FOP(cmpeq),
2706    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2707               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2708
2709    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2710    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2711    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2712
2713    /* MMX ops and their SSE extensions */
2714    [0x60] = MMX_OP2(punpcklbw),
2715    [0x61] = MMX_OP2(punpcklwd),
2716    [0x62] = MMX_OP2(punpckldq),
2717    [0x63] = MMX_OP2(packsswb),
2718    [0x64] = MMX_OP2(pcmpgtb),
2719    [0x65] = MMX_OP2(pcmpgtw),
2720    [0x66] = MMX_OP2(pcmpgtl),
2721    [0x67] = MMX_OP2(packuswb),
2722    [0x68] = MMX_OP2(punpckhbw),
2723    [0x69] = MMX_OP2(punpckhwd),
2724    [0x6a] = MMX_OP2(punpckhdq),
2725    [0x6b] = MMX_OP2(packssdw),
2726    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2727    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2728    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2729    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2730    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2731               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2732               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2733               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2734    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2735    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2736    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2737    [0x74] = MMX_OP2(pcmpeqb),
2738    [0x75] = MMX_OP2(pcmpeqw),
2739    [0x76] = MMX_OP2(pcmpeql),
2740    [0x77] = { SSE_DUMMY }, /* emms */
2741    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2742    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2743    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2744    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2745    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2746    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2747    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2748    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2749    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2750    [0xd1] = MMX_OP2(psrlw),
2751    [0xd2] = MMX_OP2(psrld),
2752    [0xd3] = MMX_OP2(psrlq),
2753    [0xd4] = MMX_OP2(paddq),
2754    [0xd5] = MMX_OP2(pmullw),
2755    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2756    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2757    [0xd8] = MMX_OP2(psubusb),
2758    [0xd9] = MMX_OP2(psubusw),
2759    [0xda] = MMX_OP2(pminub),
2760    [0xdb] = MMX_OP2(pand),
2761    [0xdc] = MMX_OP2(paddusb),
2762    [0xdd] = MMX_OP2(paddusw),
2763    [0xde] = MMX_OP2(pmaxub),
2764    [0xdf] = MMX_OP2(pandn),
2765    [0xe0] = MMX_OP2(pavgb),
2766    [0xe1] = MMX_OP2(psraw),
2767    [0xe2] = MMX_OP2(psrad),
2768    [0xe3] = MMX_OP2(pavgw),
2769    [0xe4] = MMX_OP2(pmulhuw),
2770    [0xe5] = MMX_OP2(pmulhw),
2771    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2772    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2773    [0xe8] = MMX_OP2(psubsb),
2774    [0xe9] = MMX_OP2(psubsw),
2775    [0xea] = MMX_OP2(pminsw),
2776    [0xeb] = MMX_OP2(por),
2777    [0xec] = MMX_OP2(paddsb),
2778    [0xed] = MMX_OP2(paddsw),
2779    [0xee] = MMX_OP2(pmaxsw),
2780    [0xef] = MMX_OP2(pxor),
2781    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2782    [0xf1] = MMX_OP2(psllw),
2783    [0xf2] = MMX_OP2(pslld),
2784    [0xf3] = MMX_OP2(psllq),
2785    [0xf4] = MMX_OP2(pmuludq),
2786    [0xf5] = MMX_OP2(pmaddwd),
2787    [0xf6] = MMX_OP2(psadbw),
2788    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2789               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2790    [0xf8] = MMX_OP2(psubb),
2791    [0xf9] = MMX_OP2(psubw),
2792    [0xfa] = MMX_OP2(psubl),
2793    [0xfb] = MMX_OP2(psubq),
2794    [0xfc] = MMX_OP2(paddb),
2795    [0xfd] = MMX_OP2(paddw),
2796    [0xfe] = MMX_OP2(paddl),
2797};
2798
2799static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2800    [0 + 2] = MMX_OP2(psrlw),
2801    [0 + 4] = MMX_OP2(psraw),
2802    [0 + 6] = MMX_OP2(psllw),
2803    [8 + 2] = MMX_OP2(psrld),
2804    [8 + 4] = MMX_OP2(psrad),
2805    [8 + 6] = MMX_OP2(pslld),
2806    [16 + 2] = MMX_OP2(psrlq),
2807    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2808    [16 + 6] = MMX_OP2(psllq),
2809    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2810};
2811
2812static const SSEFunc_0_epi sse_op_table3ai[] = {
2813    gen_helper_cvtsi2ss,
2814    gen_helper_cvtsi2sd
2815};
2816
2817#ifdef TARGET_X86_64
2818static const SSEFunc_0_epl sse_op_table3aq[] = {
2819    gen_helper_cvtsq2ss,
2820    gen_helper_cvtsq2sd
2821};
2822#endif
2823
2824static const SSEFunc_i_ep sse_op_table3bi[] = {
2825    gen_helper_cvttss2si,
2826    gen_helper_cvtss2si,
2827    gen_helper_cvttsd2si,
2828    gen_helper_cvtsd2si
2829};
2830
2831#ifdef TARGET_X86_64
2832static const SSEFunc_l_ep sse_op_table3bq[] = {
2833    gen_helper_cvttss2sq,
2834    gen_helper_cvtss2sq,
2835    gen_helper_cvttsd2sq,
2836    gen_helper_cvtsd2sq
2837};
2838#endif
2839
2840static const SSEFunc_0_epp sse_op_table4[8][4] = {
2841    SSE_FOP(cmpeq),
2842    SSE_FOP(cmplt),
2843    SSE_FOP(cmple),
2844    SSE_FOP(cmpunord),
2845    SSE_FOP(cmpneq),
2846    SSE_FOP(cmpnlt),
2847    SSE_FOP(cmpnle),
2848    SSE_FOP(cmpord),
2849};
2850
2851static const SSEFunc_0_epp sse_op_table5[256] = {
2852    [0x0c] = gen_helper_pi2fw,
2853    [0x0d] = gen_helper_pi2fd,
2854    [0x1c] = gen_helper_pf2iw,
2855    [0x1d] = gen_helper_pf2id,
2856    [0x8a] = gen_helper_pfnacc,
2857    [0x8e] = gen_helper_pfpnacc,
2858    [0x90] = gen_helper_pfcmpge,
2859    [0x94] = gen_helper_pfmin,
2860    [0x96] = gen_helper_pfrcp,
2861    [0x97] = gen_helper_pfrsqrt,
2862    [0x9a] = gen_helper_pfsub,
2863    [0x9e] = gen_helper_pfadd,
2864    [0xa0] = gen_helper_pfcmpgt,
2865    [0xa4] = gen_helper_pfmax,
2866    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2867    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2868    [0xaa] = gen_helper_pfsubr,
2869    [0xae] = gen_helper_pfacc,
2870    [0xb0] = gen_helper_pfcmpeq,
2871    [0xb4] = gen_helper_pfmul,
2872    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2873    [0xb7] = gen_helper_pmulhrw_mmx,
2874    [0xbb] = gen_helper_pswapd,
2875    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2876};
2877
2878struct SSEOpHelper_epp {
2879    SSEFunc_0_epp op[2];
2880    uint32_t ext_mask;
2881};
2882
2883struct SSEOpHelper_eppi {
2884    SSEFunc_0_eppi op[2];
2885    uint32_t ext_mask;
2886};
2887
2888#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2889#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2890#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2891#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2892#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2893        CPUID_EXT_PCLMULQDQ }
2894#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2895
2896static const struct SSEOpHelper_epp sse_op_table6[256] = {
2897    [0x00] = SSSE3_OP(pshufb),
2898    [0x01] = SSSE3_OP(phaddw),
2899    [0x02] = SSSE3_OP(phaddd),
2900    [0x03] = SSSE3_OP(phaddsw),
2901    [0x04] = SSSE3_OP(pmaddubsw),
2902    [0x05] = SSSE3_OP(phsubw),
2903    [0x06] = SSSE3_OP(phsubd),
2904    [0x07] = SSSE3_OP(phsubsw),
2905    [0x08] = SSSE3_OP(psignb),
2906    [0x09] = SSSE3_OP(psignw),
2907    [0x0a] = SSSE3_OP(psignd),
2908    [0x0b] = SSSE3_OP(pmulhrsw),
2909    [0x10] = SSE41_OP(pblendvb),
2910    [0x14] = SSE41_OP(blendvps),
2911    [0x15] = SSE41_OP(blendvpd),
2912    [0x17] = SSE41_OP(ptest),
2913    [0x1c] = SSSE3_OP(pabsb),
2914    [0x1d] = SSSE3_OP(pabsw),
2915    [0x1e] = SSSE3_OP(pabsd),
2916    [0x20] = SSE41_OP(pmovsxbw),
2917    [0x21] = SSE41_OP(pmovsxbd),
2918    [0x22] = SSE41_OP(pmovsxbq),
2919    [0x23] = SSE41_OP(pmovsxwd),
2920    [0x24] = SSE41_OP(pmovsxwq),
2921    [0x25] = SSE41_OP(pmovsxdq),
2922    [0x28] = SSE41_OP(pmuldq),
2923    [0x29] = SSE41_OP(pcmpeqq),
2924    [0x2a] = SSE41_SPECIAL, /* movntqda */
2925    [0x2b] = SSE41_OP(packusdw),
2926    [0x30] = SSE41_OP(pmovzxbw),
2927    [0x31] = SSE41_OP(pmovzxbd),
2928    [0x32] = SSE41_OP(pmovzxbq),
2929    [0x33] = SSE41_OP(pmovzxwd),
2930    [0x34] = SSE41_OP(pmovzxwq),
2931    [0x35] = SSE41_OP(pmovzxdq),
2932    [0x37] = SSE42_OP(pcmpgtq),
2933    [0x38] = SSE41_OP(pminsb),
2934    [0x39] = SSE41_OP(pminsd),
2935    [0x3a] = SSE41_OP(pminuw),
2936    [0x3b] = SSE41_OP(pminud),
2937    [0x3c] = SSE41_OP(pmaxsb),
2938    [0x3d] = SSE41_OP(pmaxsd),
2939    [0x3e] = SSE41_OP(pmaxuw),
2940    [0x3f] = SSE41_OP(pmaxud),
2941    [0x40] = SSE41_OP(pmulld),
2942    [0x41] = SSE41_OP(phminposuw),
2943    [0xdb] = AESNI_OP(aesimc),
2944    [0xdc] = AESNI_OP(aesenc),
2945    [0xdd] = AESNI_OP(aesenclast),
2946    [0xde] = AESNI_OP(aesdec),
2947    [0xdf] = AESNI_OP(aesdeclast),
2948};
2949
2950static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2951    [0x08] = SSE41_OP(roundps),
2952    [0x09] = SSE41_OP(roundpd),
2953    [0x0a] = SSE41_OP(roundss),
2954    [0x0b] = SSE41_OP(roundsd),
2955    [0x0c] = SSE41_OP(blendps),
2956    [0x0d] = SSE41_OP(blendpd),
2957    [0x0e] = SSE41_OP(pblendw),
2958    [0x0f] = SSSE3_OP(palignr),
2959    [0x14] = SSE41_SPECIAL, /* pextrb */
2960    [0x15] = SSE41_SPECIAL, /* pextrw */
2961    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2962    [0x17] = SSE41_SPECIAL, /* extractps */
2963    [0x20] = SSE41_SPECIAL, /* pinsrb */
2964    [0x21] = SSE41_SPECIAL, /* insertps */
2965    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2966    [0x40] = SSE41_OP(dpps),
2967    [0x41] = SSE41_OP(dppd),
2968    [0x42] = SSE41_OP(mpsadbw),
2969    [0x44] = PCLMULQDQ_OP(pclmulqdq),
2970    [0x60] = SSE42_OP(pcmpestrm),
2971    [0x61] = SSE42_OP(pcmpestri),
2972    [0x62] = SSE42_OP(pcmpistrm),
2973    [0x63] = SSE42_OP(pcmpistri),
2974    [0xdf] = AESNI_OP(aeskeygenassist),
2975};
2976
2977static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2978                    target_ulong pc_start, int rex_r)
2979{
2980    int b1, op1_offset, op2_offset, is_xmm, val;
2981    int modrm, mod, rm, reg;
2982    SSEFunc_0_epp sse_fn_epp;
2983    SSEFunc_0_eppi sse_fn_eppi;
2984    SSEFunc_0_ppi sse_fn_ppi;
2985    SSEFunc_0_eppt sse_fn_eppt;
2986    TCGMemOp ot;
2987
2988    b &= 0xff;
2989    if (s->prefix & PREFIX_DATA)
2990        b1 = 1;
2991    else if (s->prefix & PREFIX_REPZ)
2992        b1 = 2;
2993    else if (s->prefix & PREFIX_REPNZ)
2994        b1 = 3;
2995    else
2996        b1 = 0;
2997    sse_fn_epp = sse_op_table1[b][b1];
2998    if (!sse_fn_epp) {
2999        goto illegal_op;
3000    }
3001    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3002        is_xmm = 1;
3003    } else {
3004        if (b1 == 0) {
3005            /* MMX case */
3006            is_xmm = 0;
3007        } else {
3008            is_xmm = 1;
3009        }
3010    }
3011    /* simple MMX/SSE operation */
3012    if (s->flags & HF_TS_MASK) {
3013        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3014        return;
3015    }
3016    if (s->flags & HF_EM_MASK) {
3017    illegal_op:
3018        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
3019        return;
3020    }
3021    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
3022        if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
3023            goto illegal_op;
3024    if (b == 0x0e) {
3025        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3026            goto illegal_op;
3027        /* femms */
3028        gen_helper_emms(cpu_env);
3029        return;
3030    }
3031    if (b == 0x77) {
3032        /* emms */
3033        gen_helper_emms(cpu_env);
3034        return;
3035    }
3036    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3037       the static cpu state) */
3038    if (!is_xmm) {
3039        gen_helper_enter_mmx(cpu_env);
3040    }
3041
3042    modrm = cpu_ldub_code(env, s->pc++);
3043    reg = ((modrm >> 3) & 7);
3044    if (is_xmm)
3045        reg |= rex_r;
3046    mod = (modrm >> 6) & 3;
3047    if (sse_fn_epp == SSE_SPECIAL) {
3048        b |= (b1 << 8);
3049        switch(b) {
3050        case 0x0e7: /* movntq */
3051            if (mod == 3)
3052                goto illegal_op;
3053            gen_lea_modrm(env, s, modrm);
3054            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3055            break;
3056        case 0x1e7: /* movntdq */
3057        case 0x02b: /* movntps */
3058        case 0x12b: /* movntps */
3059            if (mod == 3)
3060                goto illegal_op;
3061            gen_lea_modrm(env, s, modrm);
3062            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3063            break;
3064        case 0x3f0: /* lddqu */
3065            if (mod == 3)
3066                goto illegal_op;
3067            gen_lea_modrm(env, s, modrm);
3068            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3069            break;
3070        case 0x22b: /* movntss */
3071        case 0x32b: /* movntsd */
3072            if (mod == 3)
3073                goto illegal_op;
3074            gen_lea_modrm(env, s, modrm);
3075            if (b1 & 1) {
3076                gen_stq_env_A0(s, offsetof(CPUX86State,
3077                                           xmm_regs[reg].XMM_Q(0)));
3078            } else {
3079                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3080                    xmm_regs[reg].XMM_L(0)));
3081                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
3082            }
3083            break;
3084        case 0x6e: /* movd mm, ea */
3085#ifdef TARGET_X86_64
3086            if (s->dflag == MO_64) {
3087                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3088                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3089            } else
3090#endif
3091            {
3092                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3093                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3094                                 offsetof(CPUX86State,fpregs[reg].mmx));
3095                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3096                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3097            }
3098            break;
3099        case 0x16e: /* movd xmm, ea */
3100#ifdef TARGET_X86_64
3101            if (s->dflag == MO_64) {
3102                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3103                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3104                                 offsetof(CPUX86State,xmm_regs[reg]));
3105                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
3106            } else
3107#endif
3108            {
3109                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3110                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3111                                 offsetof(CPUX86State,xmm_regs[reg]));
3112                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3113                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3114            }
3115            break;
3116        case 0x6f: /* movq mm, ea */
3117            if (mod != 3) {
3118                gen_lea_modrm(env, s, modrm);
3119                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3120            } else {
3121                rm = (modrm & 7);
3122                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3123                               offsetof(CPUX86State,fpregs[rm].mmx));
3124                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3125                               offsetof(CPUX86State,fpregs[reg].mmx));
3126            }
3127            break;
3128        case 0x010: /* movups */
3129        case 0x110: /* movupd */
3130        case 0x028: /* movaps */
3131        case 0x128: /* movapd */
3132        case 0x16f: /* movdqa xmm, ea */
3133        case 0x26f: /* movdqu xmm, ea */
3134            if (mod != 3) {
3135                gen_lea_modrm(env, s, modrm);
3136                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3137            } else {
3138                rm = (modrm & 7) | REX_B(s);
3139                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3140                            offsetof(CPUX86State,xmm_regs[rm]));
3141            }
3142            break;
3143        case 0x210: /* movss xmm, ea */
3144            if (mod != 3) {
3145                gen_lea_modrm(env, s, modrm);
3146                gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
3147                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3148                tcg_gen_movi_tl(cpu_T[0], 0);
3149                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3150                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3151                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3152            } else {
3153                rm = (modrm & 7) | REX_B(s);
3154                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3155                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3156            }
3157            break;
3158        case 0x310: /* movsd xmm, ea */
3159            if (mod != 3) {
3160                gen_lea_modrm(env, s, modrm);
3161                gen_ldq_env_A0(s, offsetof(CPUX86State,
3162                                           xmm_regs[reg].XMM_Q(0)));
3163                tcg_gen_movi_tl(cpu_T[0], 0);
3164                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3165                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3166            } else {
3167                rm = (modrm & 7) | REX_B(s);
3168                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3169                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3170            }
3171            break;
3172        case 0x012: /* movlps */
3173        case 0x112: /* movlpd */
3174            if (mod != 3) {
3175                gen_lea_modrm(env, s, modrm);
3176                gen_ldq_env_A0(s, offsetof(CPUX86State,
3177                                           xmm_regs[reg].XMM_Q(0)));
3178            } else {
3179                /* movhlps */
3180                rm = (modrm & 7) | REX_B(s);
3181                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3182                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3183            }
3184            break;
3185        case 0x212: /* movsldup */
3186            if (mod != 3) {
3187                gen_lea_modrm(env, s, modrm);
3188                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3189            } else {
3190                rm = (modrm & 7) | REX_B(s);
3191                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3192                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3193                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3194                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
3195            }
3196            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3197                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3198            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3199                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3200            break;
3201        case 0x312: /* movddup */
3202            if (mod != 3) {
3203                gen_lea_modrm(env, s, modrm);
3204                gen_ldq_env_A0(s, offsetof(CPUX86State,
3205                                           xmm_regs[reg].XMM_Q(0)));
3206            } else {
3207                rm = (modrm & 7) | REX_B(s);
3208                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3209                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3210            }
3211            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3212                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3213            break;
3214        case 0x016: /* movhps */
3215        case 0x116: /* movhpd */
3216            if (mod != 3) {
3217                gen_lea_modrm(env, s, modrm);
3218                gen_ldq_env_A0(s, offsetof(CPUX86State,
3219                                           xmm_regs[reg].XMM_Q(1)));
3220            } else {
3221                /* movlhps */
3222                rm = (modrm & 7) | REX_B(s);
3223                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3224                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3225            }
3226            break;
3227        case 0x216: /* movshdup */
3228            if (mod != 3) {
3229                gen_lea_modrm(env, s, modrm);
3230                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3231            } else {
3232                rm = (modrm & 7) | REX_B(s);
3233                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3234                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
3235                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3236                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
3237            }
3238            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3239                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3240            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3241                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3242            break;
3243        case 0x178:
3244        case 0x378:
3245            {
3246                int bit_index, field_length;
3247
3248                if (b1 == 1 && reg != 0)
3249                    goto illegal_op;
3250                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3251                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3252                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3253                    offsetof(CPUX86State,xmm_regs[reg]));
3254                if (b1 == 1)
3255                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
3256                                       tcg_const_i32(bit_index),
3257                                       tcg_const_i32(field_length));
3258                else
3259                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
3260                                         tcg_const_i32(bit_index),
3261                                         tcg_const_i32(field_length));
3262            }
3263            break;
3264        case 0x7e: /* movd ea, mm */
3265#ifdef TARGET_X86_64
3266            if (s->dflag == MO_64) {
3267                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3268                               offsetof(CPUX86State,fpregs[reg].mmx));
3269                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3270            } else
3271#endif
3272            {
3273                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3274                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3275                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3276            }
3277            break;
3278        case 0x17e: /* movd ea, xmm */
3279#ifdef TARGET_X86_64
3280            if (s->dflag == MO_64) {
3281                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3282                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3283                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3284            } else
3285#endif
3286            {
3287                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3288                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3289                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3290            }
3291            break;
3292        case 0x27e: /* movq xmm, ea */
3293            if (mod != 3) {
3294                gen_lea_modrm(env, s, modrm);
3295                gen_ldq_env_A0(s, offsetof(CPUX86State,
3296                                           xmm_regs[reg].XMM_Q(0)));
3297            } else {
3298                rm = (modrm & 7) | REX_B(s);
3299                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3300                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3301            }
3302            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3303            break;
3304        case 0x7f: /* movq ea, mm */
3305            if (mod != 3) {
3306                gen_lea_modrm(env, s, modrm);
3307                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3308            } else {
3309                rm = (modrm & 7);
3310                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3311                            offsetof(CPUX86State,fpregs[reg].mmx));
3312            }
3313            break;
3314        case 0x011: /* movups */
3315        case 0x111: /* movupd */
3316        case 0x029: /* movaps */
3317        case 0x129: /* movapd */
3318        case 0x17f: /* movdqa ea, xmm */
3319        case 0x27f: /* movdqu ea, xmm */
3320            if (mod != 3) {
3321                gen_lea_modrm(env, s, modrm);
3322                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3323            } else {
3324                rm = (modrm & 7) | REX_B(s);
3325                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3326                            offsetof(CPUX86State,xmm_regs[reg]));
3327            }
3328            break;
3329        case 0x211: /* movss ea, xmm */
3330            if (mod != 3) {
3331                gen_lea_modrm(env, s, modrm);
3332                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3333                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
3334            } else {
3335                rm = (modrm & 7) | REX_B(s);
3336                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
3337                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3338            }
3339            break;
3340        case 0x311: /* movsd ea, xmm */
3341            if (mod != 3) {
3342                gen_lea_modrm(env, s, modrm);
3343                gen_stq_env_A0(s, offsetof(CPUX86State,
3344                                           xmm_regs[reg].XMM_Q(0)));
3345            } else {
3346                rm = (modrm & 7) | REX_B(s);
3347                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3348                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3349            }
3350            break;
3351        case 0x013: /* movlps */
3352        case 0x113: /* movlpd */
3353            if (mod != 3) {
3354                gen_lea_modrm(env, s, modrm);
3355                gen_stq_env_A0(s, offsetof(CPUX86State,
3356                                           xmm_regs[reg].XMM_Q(0)));
3357            } else {
3358                goto illegal_op;
3359            }
3360            break;
3361        case 0x017: /* movhps */
3362        case 0x117: /* movhpd */
3363            if (mod != 3) {
3364                gen_lea_modrm(env, s, modrm);
3365                gen_stq_env_A0(s, offsetof(CPUX86State,
3366                                           xmm_regs[reg].XMM_Q(1)));
3367            } else {
3368                goto illegal_op;
3369            }
3370            break;
3371        case 0x71: /* shift mm, im */
3372        case 0x72:
3373        case 0x73:
3374        case 0x171: /* shift xmm, im */
3375        case 0x172:
3376        case 0x173:
3377            if (b1 >= 2) {
3378                goto illegal_op;
3379            }
3380            val = cpu_ldub_code(env, s->pc++);
3381            if (is_xmm) {
3382                tcg_gen_movi_tl(cpu_T[0], val);
3383                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3384                tcg_gen_movi_tl(cpu_T[0], 0);
3385                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
3386                op1_offset = offsetof(CPUX86State,xmm_t0);
3387            } else {
3388                tcg_gen_movi_tl(cpu_T[0], val);
3389                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3390                tcg_gen_movi_tl(cpu_T[0], 0);
3391                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3392                op1_offset = offsetof(CPUX86State,mmx_t0);
3393            }
3394            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3395                                       (((modrm >> 3)) & 7)][b1];
3396            if (!sse_fn_epp) {
3397                goto illegal_op;
3398            }
3399            if (is_xmm) {
3400                rm = (modrm & 7) | REX_B(s);
3401                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3402            } else {
3403                rm = (modrm & 7);
3404                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3405            }
3406            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3407            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3408            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3409            break;
3410        case 0x050: /* movmskps */
3411            rm = (modrm & 7) | REX_B(s);
3412            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3413                             offsetof(CPUX86State,xmm_regs[rm]));
3414            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3415            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3416            break;
3417        case 0x150: /* movmskpd */
3418            rm = (modrm & 7) | REX_B(s);
3419            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3420                             offsetof(CPUX86State,xmm_regs[rm]));
3421            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3422            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3423            break;
3424        case 0x02a: /* cvtpi2ps */
3425        case 0x12a: /* cvtpi2pd */
3426            gen_helper_enter_mmx(cpu_env);
3427            if (mod != 3) {
3428                gen_lea_modrm(env, s, modrm);
3429                op2_offset = offsetof(CPUX86State,mmx_t0);
3430                gen_ldq_env_A0(s, op2_offset);
3431            } else {
3432                rm = (modrm & 7);
3433                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3434            }
3435            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3436            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3437            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3438            switch(b >> 8) {
3439            case 0x0:
3440                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3441                break;
3442            default:
3443            case 0x1:
3444                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3445                break;
3446            }
3447            break;
3448        case 0x22a: /* cvtsi2ss */
3449        case 0x32a: /* cvtsi2sd */
3450            ot = mo_64_32(s->dflag);
3451            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3452            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3453            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3454            if (ot == MO_32) {
3455                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3456                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3457                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3458            } else {
3459#ifdef TARGET_X86_64
3460                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3461                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T[0]);
3462#else
3463                goto illegal_op;
3464#endif
3465            }
3466            break;
3467        case 0x02c: /* cvttps2pi */
3468        case 0x12c: /* cvttpd2pi */
3469        case 0x02d: /* cvtps2pi */
3470        case 0x12d: /* cvtpd2pi */
3471            gen_helper_enter_mmx(cpu_env);
3472            if (mod != 3) {
3473                gen_lea_modrm(env, s, modrm);
3474                op2_offset = offsetof(CPUX86State,xmm_t0);
3475                gen_ldo_env_A0(s, op2_offset);
3476            } else {
3477                rm = (modrm & 7) | REX_B(s);
3478                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3479            }
3480            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3481            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3482            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3483            switch(b) {
3484            case 0x02c:
3485                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3486                break;
3487            case 0x12c:
3488                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3489                break;
3490            case 0x02d:
3491                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3492                break;
3493            case 0x12d:
3494                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3495                break;
3496            }
3497            break;
3498        case 0x22c: /* cvttss2si */
3499        case 0x32c: /* cvttsd2si */
3500        case 0x22d: /* cvtss2si */
3501        case 0x32d: /* cvtsd2si */
3502            ot = mo_64_32(s->dflag);
3503            if (mod != 3) {
3504                gen_lea_modrm(env, s, modrm);
3505                if ((b >> 8) & 1) {
3506                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
3507                } else {
3508                    gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
3509                    tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3510                }
3511                op2_offset = offsetof(CPUX86State,xmm_t0);
3512            } else {
3513                rm = (modrm & 7) | REX_B(s);
3514                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3515            }
3516            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3517            if (ot == MO_32) {
3518                SSEFunc_i_ep sse_fn_i_ep =
3519                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3520                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3521                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3522            } else {
3523#ifdef TARGET_X86_64
3524                SSEFunc_l_ep sse_fn_l_ep =
3525                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3526                sse_fn_l_ep(cpu_T[0], cpu_env, cpu_ptr0);
3527#else
3528                goto illegal_op;
3529#endif
3530            }
3531            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3532            break;
3533        case 0xc4: /* pinsrw */
3534        case 0x1c4:
3535            s->rip_offset = 1;
3536            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3537            val = cpu_ldub_code(env, s->pc++);
3538            if (b1) {
3539                val &= 7;
3540                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3541                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3542            } else {
3543                val &= 3;
3544                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3545                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3546            }
3547            break;
3548        case 0xc5: /* pextrw */
3549        case 0x1c5:
3550            if (mod != 3)
3551                goto illegal_op;
3552            ot = mo_64_32(s->dflag);
3553            val = cpu_ldub_code(env, s->pc++);
3554            if (b1) {
3555                val &= 7;
3556                rm = (modrm & 7) | REX_B(s);
3557                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3558                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3559            } else {
3560                val &= 3;
3561                rm = (modrm & 7);
3562                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3563                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3564            }
3565            reg = ((modrm >> 3) & 7) | rex_r;
3566            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3567            break;
3568        case 0x1d6: /* movq ea, xmm */
3569            if (mod != 3) {
3570                gen_lea_modrm(env, s, modrm);
3571                gen_stq_env_A0(s, offsetof(CPUX86State,
3572                                           xmm_regs[reg].XMM_Q(0)));
3573            } else {
3574                rm = (modrm & 7) | REX_B(s);
3575                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3576                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3577                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3578            }
3579            break;
3580        case 0x2d6: /* movq2dq */
3581            gen_helper_enter_mmx(cpu_env);
3582            rm = (modrm & 7);
3583            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3584                        offsetof(CPUX86State,fpregs[rm].mmx));
3585            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3586            break;
3587        case 0x3d6: /* movdq2q */
3588            gen_helper_enter_mmx(cpu_env);
3589            rm = (modrm & 7) | REX_B(s);
3590            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3591                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3592            break;
3593        case 0xd7: /* pmovmskb */
3594        case 0x1d7:
3595            if (mod != 3)
3596                goto illegal_op;
3597            if (b1) {
3598                rm = (modrm & 7) | REX_B(s);
3599                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3600                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3601            } else {
3602                rm = (modrm & 7);
3603                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3604                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3605            }
3606            reg = ((modrm >> 3) & 7) | rex_r;
3607            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3608            break;
3609
3610        case 0x138:
3611        case 0x038:
3612            b = modrm;
3613            if ((b & 0xf0) == 0xf0) {
3614                goto do_0f_38_fx;
3615            }
3616            modrm = cpu_ldub_code(env, s->pc++);
3617            rm = modrm & 7;
3618            reg = ((modrm >> 3) & 7) | rex_r;
3619            mod = (modrm >> 6) & 3;
3620            if (b1 >= 2) {
3621                goto illegal_op;
3622            }
3623
3624            sse_fn_epp = sse_op_table6[b].op[b1];
3625            if (!sse_fn_epp) {
3626                goto illegal_op;
3627            }
3628            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3629                goto illegal_op;
3630
3631            if (b1) {
3632                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3633                if (mod == 3) {
3634                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3635                } else {
3636                    op2_offset = offsetof(CPUX86State,xmm_t0);
3637                    gen_lea_modrm(env, s, modrm);
3638                    switch (b) {
3639                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3640                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3641                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3642                        gen_ldq_env_A0(s, op2_offset +
3643                                        offsetof(XMMReg, XMM_Q(0)));
3644                        break;
3645                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3646                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3647                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3648                                            s->mem_index, MO_LEUL);
3649                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3650                                        offsetof(XMMReg, XMM_L(0)));
3651                        break;
3652                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3653                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3654                                           s->mem_index, MO_LEUW);
3655                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3656                                        offsetof(XMMReg, XMM_W(0)));
3657                        break;
3658                    case 0x2a:            /* movntqda */
3659                        gen_ldo_env_A0(s, op1_offset);
3660                        return;
3661                    default:
3662                        gen_ldo_env_A0(s, op2_offset);
3663                    }
3664                }
3665            } else {
3666                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3667                if (mod == 3) {
3668                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3669                } else {
3670                    op2_offset = offsetof(CPUX86State,mmx_t0);
3671                    gen_lea_modrm(env, s, modrm);
3672                    gen_ldq_env_A0(s, op2_offset);
3673                }
3674            }
3675            if (sse_fn_epp == SSE_SPECIAL) {
3676                goto illegal_op;
3677            }
3678
3679            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3680            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3681            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3682
3683            if (b == 0x17) {
3684                set_cc_op(s, CC_OP_EFLAGS);
3685            }
3686            break;
3687
3688        case 0x238:
3689        case 0x338:
3690        do_0f_38_fx:
3691            /* Various integer extensions at 0f 38 f[0-f].  */
3692            b = modrm | (b1 << 8);
3693            modrm = cpu_ldub_code(env, s->pc++);
3694            reg = ((modrm >> 3) & 7) | rex_r;
3695
3696            switch (b) {
3697            case 0x3f0: /* crc32 Gd,Eb */
3698            case 0x3f1: /* crc32 Gd,Ey */
3699            do_crc32:
3700                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3701                    goto illegal_op;
3702                }
3703                if ((b & 0xff) == 0xf0) {
3704                    ot = MO_8;
3705                } else if (s->dflag != MO_64) {
3706                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3707                } else {
3708                    ot = MO_64;
3709                }
3710
3711                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3712                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3713                gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
3714                                 cpu_T[0], tcg_const_i32(8 << ot));
3715
3716                ot = mo_64_32(s->dflag);
3717                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3718                break;
3719
3720            case 0x1f0: /* crc32 or movbe */
3721            case 0x1f1:
3722                /* For these insns, the f3 prefix is supposed to have priority
3723                   over the 66 prefix, but that's not what we implement above
3724                   setting b1.  */
3725                if (s->prefix & PREFIX_REPNZ) {
3726                    goto do_crc32;
3727                }
3728                /* FALLTHRU */
3729            case 0x0f0: /* movbe Gy,My */
3730            case 0x0f1: /* movbe My,Gy */
3731                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3732                    goto illegal_op;
3733                }
3734                if (s->dflag != MO_64) {
3735                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3736                } else {
3737                    ot = MO_64;
3738                }
3739
3740                gen_lea_modrm(env, s, modrm);
3741                if ((b & 1) == 0) {
3742                    tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
3743                                       s->mem_index, ot | MO_BE);
3744                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3745                } else {
3746                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3747                                       s->mem_index, ot | MO_BE);
3748                }
3749                break;
3750
3751            case 0x0f2: /* andn Gy, By, Ey */
3752                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3753                    || !(s->prefix & PREFIX_VEX)
3754                    || s->vex_l != 0) {
3755                    goto illegal_op;
3756                }
3757                ot = mo_64_32(s->dflag);
3758                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3759                tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
3760                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3761                gen_op_update1_cc();
3762                set_cc_op(s, CC_OP_LOGICB + ot);
3763                break;
3764
3765            case 0x0f7: /* bextr Gy, Ey, By */
3766                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3767                    || !(s->prefix & PREFIX_VEX)
3768                    || s->vex_l != 0) {
3769                    goto illegal_op;
3770                }
3771                ot = mo_64_32(s->dflag);
3772                {
3773                    TCGv bound, zero;
3774
3775                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3776                    /* Extract START, and shift the operand.
3777                       Shifts larger than operand size get zeros.  */
3778                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3779                    tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_A0);
3780
3781                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3782                    zero = tcg_const_tl(0);
3783                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T[0], cpu_A0, bound,
3784                                       cpu_T[0], zero);
3785                    tcg_temp_free(zero);
3786
3787                    /* Extract the LEN into a mask.  Lengths larger than
3788                       operand size get all ones.  */
3789                    tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
3790                    tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
3791                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3792                                       cpu_A0, bound);
3793                    tcg_temp_free(bound);
3794                    tcg_gen_movi_tl(cpu_T[1], 1);
3795                    tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_A0);
3796                    tcg_gen_subi_tl(cpu_T[1], cpu_T[1], 1);
3797                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3798
3799                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3800                    gen_op_update1_cc();
3801                    set_cc_op(s, CC_OP_LOGICB + ot);
3802                }
3803                break;
3804
3805            case 0x0f5: /* bzhi Gy, Ey, By */
3806                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3807                    || !(s->prefix & PREFIX_VEX)
3808                    || s->vex_l != 0) {
3809                    goto illegal_op;
3810                }
3811                ot = mo_64_32(s->dflag);
3812                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3813                tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
3814                {
3815                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3816                    /* Note that since we're using BMILG (in order to get O
3817                       cleared) we need to store the inverse into C.  */
3818                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3819                                       cpu_T[1], bound);
3820                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T[1], cpu_T[1],
3821                                       bound, bound, cpu_T[1]);
3822                    tcg_temp_free(bound);
3823                }
3824                tcg_gen_movi_tl(cpu_A0, -1);
3825                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T[1]);
3826                tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_A0);
3827                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3828                gen_op_update1_cc();
3829                set_cc_op(s, CC_OP_BMILGB + ot);
3830                break;
3831
3832            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3833                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3834                    || !(s->prefix & PREFIX_VEX)
3835                    || s->vex_l != 0) {
3836                    goto illegal_op;
3837                }
3838                ot = mo_64_32(s->dflag);
3839                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3840                switch (ot) {
3841                default:
3842                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3843                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3844                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3845                                      cpu_tmp2_i32, cpu_tmp3_i32);
3846                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3847                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3848                    break;
3849#ifdef TARGET_X86_64
3850                case MO_64:
3851                    tcg_gen_mulu2_i64(cpu_T[0], cpu_T[1],
3852                                      cpu_T[0], cpu_regs[R_EDX]);
3853                    tcg_gen_mov_i64(cpu_regs[s->vex_v], cpu_T[0]);
3854                    tcg_gen_mov_i64(cpu_regs[reg], cpu_T[1]);
3855                    break;
3856#endif
3857                }
3858                break;
3859
3860            case 0x3f5: /* pdep Gy, By, Ey */
3861                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3862                    || !(s->prefix & PREFIX_VEX)
3863                    || s->vex_l != 0) {
3864                    goto illegal_op;
3865                }
3866                ot = mo_64_32(s->dflag);
3867                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3868                /* Note that by zero-extending the mask operand, we
3869                   automatically handle zero-extending the result.  */
3870                if (ot == MO_64) {
3871                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
3872                } else {
3873                    tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
3874                }
3875                gen_helper_pdep(cpu_regs[reg], cpu_T[0], cpu_T[1]);
3876                break;
3877
3878            case 0x2f5: /* pext Gy, By, Ey */
3879                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3880                    || !(s->prefix & PREFIX_VEX)
3881                    || s->vex_l != 0) {
3882                    goto illegal_op;
3883                }
3884                ot = mo_64_32(s->dflag);
3885                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3886                /* Note that by zero-extending the mask operand, we
3887                   automatically handle zero-extending the result.  */
3888                if (ot == MO_64) {
3889                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
3890                } else {
3891                    tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
3892                }
3893                gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
3894                break;
3895
3896            case 0x1f6: /* adcx Gy, Ey */
3897            case 0x2f6: /* adox Gy, Ey */
3898                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3899                    goto illegal_op;
3900                } else {
3901                    TCGv carry_in, carry_out, zero;
3902                    int end_op;
3903
3904                    ot = mo_64_32(s->dflag);
3905                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3906
3907                    /* Re-use the carry-out from a previous round.  */
3908                    TCGV_UNUSED(carry_in);
3909                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3910                    switch (s->cc_op) {
3911                    case CC_OP_ADCX:
3912                        if (b == 0x1f6) {
3913                            carry_in = cpu_cc_dst;
3914                            end_op = CC_OP_ADCX;
3915                        } else {
3916                            end_op = CC_OP_ADCOX;
3917                        }
3918                        break;
3919                    case CC_OP_ADOX:
3920                        if (b == 0x1f6) {
3921                            end_op = CC_OP_ADCOX;
3922                        } else {
3923                            carry_in = cpu_cc_src2;
3924                            end_op = CC_OP_ADOX;
3925                        }
3926                        break;
3927                    case CC_OP_ADCOX:
3928                        end_op = CC_OP_ADCOX;
3929                        carry_in = carry_out;
3930                        break;
3931                    default:
3932                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3933                        break;
3934                    }
3935                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
3936                    if (TCGV_IS_UNUSED(carry_in)) {
3937                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3938                            gen_compute_eflags(s);
3939                        }
3940                        carry_in = cpu_tmp0;
3941                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
3942                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
3943                        tcg_gen_andi_tl(carry_in, carry_in, 1);
3944                    }
3945
3946                    switch (ot) {
3947#ifdef TARGET_X86_64
3948                    case MO_32:
3949                        /* If we know TL is 64-bit, and we want a 32-bit
3950                           result, just do everything in 64-bit arithmetic.  */
3951                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3952                        tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]);
3953                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]);
3954                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in);
3955                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]);
3956                        tcg_gen_shri_i64(carry_out, cpu_T[0], 32);
3957                        break;
3958#endif
3959                    default:
3960                        /* Otherwise compute the carry-out in two steps.  */
3961                        zero = tcg_const_tl(0);
3962                        tcg_gen_add2_tl(cpu_T[0], carry_out,
3963                                        cpu_T[0], zero,
3964                                        carry_in, zero);
3965                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3966                                        cpu_regs[reg], carry_out,
3967                                        cpu_T[0], zero);
3968                        tcg_temp_free(zero);
3969                        break;
3970                    }
3971                    set_cc_op(s, end_op);
3972                }
3973                break;
3974
3975            case 0x1f7: /* shlx Gy, Ey, By */
3976            case 0x2f7: /* sarx Gy, Ey, By */
3977            case 0x3f7: /* shrx Gy, Ey, By */
3978                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3979                    || !(s->prefix & PREFIX_VEX)
3980                    || s->vex_l != 0) {
3981                    goto illegal_op;
3982                }
3983                ot = mo_64_32(s->dflag);
3984                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3985                if (ot == MO_64) {
3986                    tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
3987                } else {
3988                    tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 31);
3989                }
3990                if (b == 0x1f7) {
3991                    tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3992                } else if (b == 0x2f7) {
3993                    if (ot != MO_64) {
3994                        tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
3995                    }
3996                    tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3997                } else {
3998                    if (ot != MO_64) {
3999                        tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
4000                    }
4001                    tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4002                }
4003                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
4004                break;
4005
4006            case 0x0f3:
4007            case 0x1f3:
4008            case 0x2f3:
4009            case 0x3f3: /* Group 17 */
4010                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4011                    || !(s->prefix & PREFIX_VEX)
4012                    || s->vex_l != 0) {
4013                    goto illegal_op;
4014                }
4015                ot = mo_64_32(s->dflag);
4016                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4017
4018                switch (reg & 7) {
4019                case 1: /* blsr By,Ey */
4020                    tcg_gen_neg_tl(cpu_T[1], cpu_T[0]);
4021                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4022                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T[0]);
4023                    gen_op_update2_cc();
4024                    set_cc_op(s, CC_OP_BMILGB + ot);
4025                    break;
4026
4027                case 2: /* blsmsk By,Ey */
4028                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4029                    tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
4030                    tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
4031                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4032                    set_cc_op(s, CC_OP_BMILGB + ot);
4033                    break;
4034
4035                case 3: /* blsi By, Ey */
4036                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4037                    tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
4038                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
4039                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4040                    set_cc_op(s, CC_OP_BMILGB + ot);
4041                    break;
4042
4043                default:
4044                    goto illegal_op;
4045                }
4046                break;
4047
4048            default:
4049                goto illegal_op;
4050            }
4051            break;
4052
4053        case 0x03a:
4054        case 0x13a:
4055            b = modrm;
4056            modrm = cpu_ldub_code(env, s->pc++);
4057            rm = modrm & 7;
4058            reg = ((modrm >> 3) & 7) | rex_r;
4059            mod = (modrm >> 6) & 3;
4060            if (b1 >= 2) {
4061                goto illegal_op;
4062            }
4063
4064            sse_fn_eppi = sse_op_table7[b].op[b1];
4065            if (!sse_fn_eppi) {
4066                goto illegal_op;
4067            }
4068            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4069                goto illegal_op;
4070
4071            if (sse_fn_eppi == SSE_SPECIAL) {
4072                ot = mo_64_32(s->dflag);
4073                rm = (modrm & 7) | REX_B(s);
4074                if (mod != 3)
4075                    gen_lea_modrm(env, s, modrm);
4076                reg = ((modrm >> 3) & 7) | rex_r;
4077                val = cpu_ldub_code(env, s->pc++);
4078                switch (b) {
4079                case 0x14: /* pextrb */
4080                    tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
4081                                            xmm_regs[reg].XMM_B(val & 15)));
4082                    if (mod == 3) {
4083                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4084                    } else {
4085                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
4086                                           s->mem_index, MO_UB);
4087                    }
4088                    break;
4089                case 0x15: /* pextrw */
4090                    tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
4091                                            xmm_regs[reg].XMM_W(val & 7)));
4092                    if (mod == 3) {
4093                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4094                    } else {
4095                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
4096                                           s->mem_index, MO_LEUW);
4097                    }
4098                    break;
4099                case 0x16:
4100                    if (ot == MO_32) { /* pextrd */
4101                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4102                                        offsetof(CPUX86State,
4103                                                xmm_regs[reg].XMM_L(val & 3)));
4104                        if (mod == 3) {
4105                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4106                        } else {
4107                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4108                                                s->mem_index, MO_LEUL);
4109                        }
4110                    } else { /* pextrq */
4111#ifdef TARGET_X86_64
4112                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4113                                        offsetof(CPUX86State,
4114                                                xmm_regs[reg].XMM_Q(val & 1)));
4115                        if (mod == 3) {
4116                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4117                        } else {
4118                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4119                                                s->mem_index, MO_LEQ);
4120                        }
4121#else
4122                        goto illegal_op;
4123#endif
4124                    }
4125                    break;
4126                case 0x17: /* extractps */
4127                    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
4128                                            xmm_regs[reg].XMM_L(val & 3)));
4129                    if (mod == 3) {
4130                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4131                    } else {
4132                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
4133                                           s->mem_index, MO_LEUL);
4134                    }
4135                    break;
4136                case 0x20: /* pinsrb */
4137                    if (mod == 3) {
4138                        gen_op_mov_v_reg(MO_32, cpu_T[0], rm);
4139                    } else {
4140                        tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
4141                                           s->mem_index, MO_UB);
4142                    }
4143                    tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
4144                                            xmm_regs[reg].XMM_B(val & 15)));
4145                    break;
4146                case 0x21: /* insertps */
4147                    if (mod == 3) {
4148                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4149                                        offsetof(CPUX86State,xmm_regs[rm]
4150                                                .XMM_L((val >> 6) & 3)));
4151                    } else {
4152                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4153                                            s->mem_index, MO_LEUL);
4154                    }
4155                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4156                                    offsetof(CPUX86State,xmm_regs[reg]
4157                                            .XMM_L((val >> 4) & 3)));
4158                    if ((val >> 0) & 1)
4159                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4160                                        cpu_env, offsetof(CPUX86State,
4161                                                xmm_regs[reg].XMM_L(0)));
4162                    if ((val >> 1) & 1)
4163                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4164                                        cpu_env, offsetof(CPUX86State,
4165                                                xmm_regs[reg].XMM_L(1)));
4166                    if ((val >> 2) & 1)
4167                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4168                                        cpu_env, offsetof(CPUX86State,
4169                                                xmm_regs[reg].XMM_L(2)));
4170                    if ((val >> 3) & 1)
4171                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4172                                        cpu_env, offsetof(CPUX86State,
4173                                                xmm_regs[reg].XMM_L(3)));
4174                    break;
4175                case 0x22:
4176                    if (ot == MO_32) { /* pinsrd */
4177                        if (mod == 3) {
4178                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4179                        } else {
4180                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4181                                                s->mem_index, MO_LEUL);
4182                        }
4183                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4184                                        offsetof(CPUX86State,
4185                                                xmm_regs[reg].XMM_L(val & 3)));
4186                    } else { /* pinsrq */
4187#ifdef TARGET_X86_64
4188                        if (mod == 3) {
4189                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4190                        } else {
4191                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4192                                                s->mem_index, MO_LEQ);
4193                        }
4194                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4195                                        offsetof(CPUX86State,
4196                                                xmm_regs[reg].XMM_Q(val & 1)));
4197#else
4198                        goto illegal_op;
4199#endif
4200                    }
4201                    break;
4202                }
4203                return;
4204            }
4205
4206            if (b1) {
4207                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4208                if (mod == 3) {
4209                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4210                } else {
4211                    op2_offset = offsetof(CPUX86State,xmm_t0);
4212                    gen_lea_modrm(env, s, modrm);
4213                    gen_ldo_env_A0(s, op2_offset);
4214                }
4215            } else {
4216                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4217                if (mod == 3) {
4218                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4219                } else {
4220                    op2_offset = offsetof(CPUX86State,mmx_t0);
4221                    gen_lea_modrm(env, s, modrm);
4222                    gen_ldq_env_A0(s, op2_offset);
4223                }
4224            }
4225            val = cpu_ldub_code(env, s->pc++);
4226
4227            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4228                set_cc_op(s, CC_OP_EFLAGS);
4229
4230                if (s->dflag == MO_64) {
4231                    /* The helper must use entire 64-bit gp registers */
4232                    val |= 1 << 8;
4233                }
4234            }
4235
4236            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4237            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4238            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4239            break;
4240
4241        case 0x33a:
4242            /* Various integer extensions at 0f 3a f[0-f].  */
4243            b = modrm | (b1 << 8);
4244            modrm = cpu_ldub_code(env, s->pc++);
4245            reg = ((modrm >> 3) & 7) | rex_r;
4246
4247            switch (b) {
4248            case 0x3f0: /* rorx Gy,Ey, Ib */
4249                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4250                    || !(s->prefix & PREFIX_VEX)
4251                    || s->vex_l != 0) {
4252                    goto illegal_op;
4253                }
4254                ot = mo_64_32(s->dflag);
4255                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4256                b = cpu_ldub_code(env, s->pc++);
4257                if (ot == MO_64) {
4258                    tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
4259                } else {
4260                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4261                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4262                    tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
4263                }
4264                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
4265                break;
4266
4267            default:
4268                goto illegal_op;
4269            }
4270            break;
4271
4272        default:
4273            goto illegal_op;
4274        }
4275    } else {
4276        /* generic MMX or SSE operation */
4277        switch(b) {
4278        case 0x70: /* pshufx insn */
4279        case 0xc6: /* pshufx insn */
4280        case 0xc2: /* compare insns */
4281            s->rip_offset = 1;
4282            break;
4283        default:
4284            break;
4285        }
4286        if (is_xmm) {
4287            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4288            if (mod != 3) {
4289                int sz = 4;
4290
4291                gen_lea_modrm(env, s, modrm);
4292                op2_offset = offsetof(CPUX86State,xmm_t0);
4293
4294                switch (b) {
4295                case 0x50 ... 0x5a:
4296                case 0x5c ... 0x5f:
4297                case 0xc2:
4298                    /* Most sse scalar operations.  */
4299                    if (b1 == 2) {
4300                        sz = 2;
4301                    } else if (b1 == 3) {
4302                        sz = 3;
4303                    }
4304                    break;
4305
4306                case 0x2e:  /* ucomis[sd] */
4307                case 0x2f:  /* comis[sd] */
4308                    if (b1 == 0) {
4309                        sz = 2;
4310                    } else {
4311                        sz = 3;
4312                    }
4313                    break;
4314                }
4315
4316                switch (sz) {
4317                case 2:
4318                    /* 32 bit access */
4319                    gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
4320                    tcg_gen_st32_tl(cpu_T[0], cpu_env,
4321                                    offsetof(CPUX86State,xmm_t0.XMM_L(0)));
4322                    break;
4323                case 3:
4324                    /* 64 bit access */
4325                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_D(0)));
4326                    break;
4327                default:
4328                    /* 128 bit access */
4329                    gen_ldo_env_A0(s, op2_offset);
4330                    break;
4331                }
4332            } else {
4333                rm = (modrm & 7) | REX_B(s);
4334                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4335            }
4336        } else {
4337            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4338            if (mod != 3) {
4339                gen_lea_modrm(env, s, modrm);
4340                op2_offset = offsetof(CPUX86State,mmx_t0);
4341                gen_ldq_env_A0(s, op2_offset);
4342            } else {
4343                rm = (modrm & 7);
4344                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4345            }
4346        }
4347        switch(b) {
4348        case 0x0f: /* 3DNow! data insns */
4349            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
4350                goto illegal_op;
4351            val = cpu_ldub_code(env, s->pc++);
4352            sse_fn_epp = sse_op_table5[val];
4353            if (!sse_fn_epp) {
4354                goto illegal_op;
4355            }
4356            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4357            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4358            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4359            break;
4360        case 0x70: /* pshufx insn */
4361        case 0xc6: /* pshufx insn */
4362            val = cpu_ldub_code(env, s->pc++);
4363            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4364            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4365            /* XXX: introduce a new table? */
4366            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4367            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4368            break;
4369        case 0xc2:
4370            /* compare insns */
4371            val = cpu_ldub_code(env, s->pc++);
4372            if (val >= 8)
4373                goto illegal_op;
4374            sse_fn_epp = sse_op_table4[val][b1];
4375
4376            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4377            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4378            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4379            break;
4380        case 0xf7:
4381            /* maskmov : we must prepare A0 */
4382            if (mod != 3)
4383                goto illegal_op;
4384            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4385            gen_extu(s->aflag, cpu_A0);
4386            gen_add_A0_ds_seg(s);
4387
4388            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4389            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4390            /* XXX: introduce a new table? */
4391            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4392            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4393            break;
4394        default:
4395            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4396            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4397            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4398            break;
4399        }
4400        if (b == 0x2e || b == 0x2f) {
4401            set_cc_op(s, CC_OP_EFLAGS);
4402        }
4403    }
4404}
4405
4406/* convert one instruction. s->is_jmp is set if the translation must
4407   be stopped. Return the next pc value */
4408static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4409                               target_ulong pc_start)
4410{
4411    int b, prefixes;
4412    int shift;
4413    TCGMemOp ot, aflag, dflag;
4414    int modrm, reg, rm, mod, op, opreg, val;
4415    target_ulong next_eip, tval;
4416    int rex_w, rex_r;
4417
4418    s->pc = pc_start;
4419    prefixes = 0;
4420    s->override = -1;
4421    rex_w = -1;
4422    rex_r = 0;
4423#ifdef TARGET_X86_64
4424    s->rex_x = 0;
4425    s->rex_b = 0;
4426    x86_64_hregs = 0;
4427#endif
4428    s->rip_offset = 0; /* for relative ip address */
4429    s->vex_l = 0;
4430    s->vex_v = 0;
4431 next_byte:
4432    b = cpu_ldub_code(env, s->pc);
4433    s->pc++;
4434    /* Collect prefixes.  */
4435    switch (b) {
4436    case 0xf3:
4437        prefixes |= PREFIX_REPZ;
4438        goto next_byte;
4439    case 0xf2:
4440        prefixes |= PREFIX_REPNZ;
4441        goto next_byte;
4442    case 0xf0:
4443        prefixes |= PREFIX_LOCK;
4444        goto next_byte;
4445    case 0x2e:
4446        s->override = R_CS;
4447        goto next_byte;
4448    case 0x36:
4449        s->override = R_SS;
4450        goto next_byte;
4451    case 0x3e:
4452        s->override = R_DS;
4453        goto next_byte;
4454    case 0x26:
4455        s->override = R_ES;
4456        goto next_byte;
4457    case 0x64:
4458        s->override = R_FS;
4459        goto next_byte;
4460    case 0x65:
4461        s->override = R_GS;
4462        goto next_byte;
4463    case 0x66:
4464        prefixes |= PREFIX_DATA;
4465        goto next_byte;
4466    case 0x67:
4467        prefixes |= PREFIX_ADR;
4468        goto next_byte;
4469#ifdef TARGET_X86_64
4470    case 0x40 ... 0x4f:
4471        if (CODE64(s)) {
4472            /* REX prefix */
4473            rex_w = (b >> 3) & 1;
4474            rex_r = (b & 0x4) << 1;
4475            s->rex_x = (b & 0x2) << 2;
4476            REX_B(s) = (b & 0x1) << 3;
4477            x86_64_hregs = 1; /* select uniform byte register addressing */
4478            goto next_byte;
4479        }
4480        break;
4481#endif
4482    case 0xc5: /* 2-byte VEX */
4483    case 0xc4: /* 3-byte VEX */
4484        /* VEX prefixes cannot be used except in 32-bit mode.
4485           Otherwise the instruction is LES or LDS.  */
4486        if (s->code32 && !s->vm86) {
4487            static const int pp_prefix[4] = {
4488                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4489            };
4490            int vex3, vex2 = cpu_ldub_code(env, s->pc);
4491
4492            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4493                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4494                   otherwise the instruction is LES or LDS.  */
4495                break;
4496            }
4497            s->pc++;
4498
4499            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4500            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4501                            | PREFIX_LOCK | PREFIX_DATA)) {
4502                goto illegal_op;
4503            }
4504#ifdef TARGET_X86_64
4505            if (x86_64_hregs) {
4506                goto illegal_op;
4507            }
4508#endif
4509            rex_r = (~vex2 >> 4) & 8;
4510            if (b == 0xc5) {
4511                vex3 = vex2;
4512                b = cpu_ldub_code(env, s->pc++);
4513            } else {
4514#ifdef TARGET_X86_64
4515                s->rex_x = (~vex2 >> 3) & 8;
4516                s->rex_b = (~vex2 >> 2) & 8;
4517#endif
4518                vex3 = cpu_ldub_code(env, s->pc++);
4519                rex_w = (vex3 >> 7) & 1;
4520                switch (vex2 & 0x1f) {
4521                case 0x01: /* Implied 0f leading opcode bytes.  */
4522                    b = cpu_ldub_code(env, s->pc++) | 0x100;
4523                    break;
4524                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4525                    b = 0x138;
4526                    break;
4527                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4528                    b = 0x13a;
4529                    break;
4530                default:   /* Reserved for future use.  */
4531                    goto illegal_op;
4532                }
4533            }
4534            s->vex_v = (~vex3 >> 3) & 0xf;
4535            s->vex_l = (vex3 >> 2) & 1;
4536            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4537        }
4538        break;
4539    }
4540
4541    /* Post-process prefixes.  */
4542    if (CODE64(s)) {
4543        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4544           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4545           over 0x66 if both are present.  */
4546        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4547        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4548        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4549    } else {
4550        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4551        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4552            dflag = MO_32;
4553        } else {
4554            dflag = MO_16;
4555        }
4556        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4557        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4558            aflag = MO_32;
4559        }  else {
4560            aflag = MO_16;
4561        }
4562    }
4563
4564    s->prefix = prefixes;
4565    s->aflag = aflag;
4566    s->dflag = dflag;
4567
4568    /* lock generation */
4569    if (prefixes & PREFIX_LOCK)
4570        gen_helper_lock();
4571
4572    /* now check op code */
4573 reswitch:
4574    switch(b) {
4575    case 0x0f:
4576        /**************************/
4577        /* extended op code */
4578        b = cpu_ldub_code(env, s->pc++) | 0x100;
4579        goto reswitch;
4580
4581        /**************************/
4582        /* arith & logic */
4583    case 0x00 ... 0x05:
4584    case 0x08 ... 0x0d:
4585    case 0x10 ... 0x15:
4586    case 0x18 ... 0x1d:
4587    case 0x20 ... 0x25:
4588    case 0x28 ... 0x2d:
4589    case 0x30 ... 0x35:
4590    case 0x38 ... 0x3d:
4591        {
4592            int op, f, val;
4593            op = (b >> 3) & 7;
4594            f = (b >> 1) & 3;
4595
4596            ot = mo_b_d(b, dflag);
4597
4598            switch(f) {
4599            case 0: /* OP Ev, Gv */
4600                modrm = cpu_ldub_code(env, s->pc++);
4601                reg = ((modrm >> 3) & 7) | rex_r;
4602                mod = (modrm >> 6) & 3;
4603                rm = (modrm & 7) | REX_B(s);
4604                if (mod != 3) {
4605                    gen_lea_modrm(env, s, modrm);
4606                    opreg = OR_TMP0;
4607                } else if (op == OP_XORL && rm == reg) {
4608                xor_zero:
4609                    /* xor reg, reg optimisation */
4610                    set_cc_op(s, CC_OP_CLR);
4611                    tcg_gen_movi_tl(cpu_T[0], 0);
4612                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
4613                    break;
4614                } else {
4615                    opreg = rm;
4616                }
4617                gen_op_mov_v_reg(ot, cpu_T[1], reg);
4618                gen_op(s, op, ot, opreg);
4619                break;
4620            case 1: /* OP Gv, Ev */
4621                modrm = cpu_ldub_code(env, s->pc++);
4622                mod = (modrm >> 6) & 3;
4623                reg = ((modrm >> 3) & 7) | rex_r;
4624                rm = (modrm & 7) | REX_B(s);
4625                if (mod != 3) {
4626                    gen_lea_modrm(env, s, modrm);
4627                    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
4628                } else if (op == OP_XORL && rm == reg) {
4629                    goto xor_zero;
4630                } else {
4631                    gen_op_mov_v_reg(ot, cpu_T[1], rm);
4632                }
4633                gen_op(s, op, ot, reg);
4634                break;
4635            case 2: /* OP A, Iv */
4636                val = insn_get(env, s, ot);
4637                tcg_gen_movi_tl(cpu_T[1], val);
4638                gen_op(s, op, ot, OR_EAX);
4639                break;
4640            }
4641        }
4642        break;
4643
4644    case 0x82:
4645        if (CODE64(s))
4646            goto illegal_op;
4647    case 0x80: /* GRP1 */
4648    case 0x81:
4649    case 0x83:
4650        {
4651            int val;
4652
4653            ot = mo_b_d(b, dflag);
4654
4655            modrm = cpu_ldub_code(env, s->pc++);
4656            mod = (modrm >> 6) & 3;
4657            rm = (modrm & 7) | REX_B(s);
4658            op = (modrm >> 3) & 7;
4659
4660            if (mod != 3) {
4661                if (b == 0x83)
4662                    s->rip_offset = 1;
4663                else
4664                    s->rip_offset = insn_const_size(ot);
4665                gen_lea_modrm(env, s, modrm);
4666                opreg = OR_TMP0;
4667            } else {
4668                opreg = rm;
4669            }
4670
4671            switch(b) {
4672            default:
4673            case 0x80:
4674            case 0x81:
4675            case 0x82:
4676                val = insn_get(env, s, ot);
4677                break;
4678            case 0x83:
4679                val = (int8_t)insn_get(env, s, MO_8);
4680                break;
4681            }
4682            tcg_gen_movi_tl(cpu_T[1], val);
4683            gen_op(s, op, ot, opreg);
4684        }
4685        break;
4686
4687        /**************************/
4688        /* inc, dec, and other misc arith */
4689    case 0x40 ... 0x47: /* inc Gv */
4690        ot = dflag;
4691        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4692        break;
4693    case 0x48 ... 0x4f: /* dec Gv */
4694        ot = dflag;
4695        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4696        break;
4697    case 0xf6: /* GRP3 */
4698    case 0xf7:
4699        ot = mo_b_d(b, dflag);
4700
4701        modrm = cpu_ldub_code(env, s->pc++);
4702        mod = (modrm >> 6) & 3;
4703        rm = (modrm & 7) | REX_B(s);
4704        op = (modrm >> 3) & 7;
4705        if (mod != 3) {
4706            if (op == 0)
4707                s->rip_offset = insn_const_size(ot);
4708            gen_lea_modrm(env, s, modrm);
4709            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
4710        } else {
4711            gen_op_mov_v_reg(ot, cpu_T[0], rm);
4712        }
4713
4714        switch(op) {
4715        case 0: /* test */
4716            val = insn_get(env, s, ot);
4717            tcg_gen_movi_tl(cpu_T[1], val);
4718            gen_op_testl_T0_T1_cc();
4719            set_cc_op(s, CC_OP_LOGICB + ot);
4720            break;
4721        case 2: /* not */
4722            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
4723            if (mod != 3) {
4724                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
4725            } else {
4726                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4727            }
4728            break;
4729        case 3: /* neg */
4730            tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
4731            if (mod != 3) {
4732                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
4733            } else {
4734                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4735            }
4736            gen_op_update_neg_cc();
4737            set_cc_op(s, CC_OP_SUBB + ot);
4738            break;
4739        case 4: /* mul */
4740            switch(ot) {
4741            case MO_8:
4742                gen_op_mov_v_reg(MO_8, cpu_T[1], R_EAX);
4743                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
4744                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
4745                /* XXX: use 32 bit mul which could be faster */
4746                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4747                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
4748                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4749                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
4750                set_cc_op(s, CC_OP_MULB);
4751                break;
4752            case MO_16:
4753                gen_op_mov_v_reg(MO_16, cpu_T[1], R_EAX);
4754                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4755                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
4756                /* XXX: use 32 bit mul which could be faster */
4757                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4758                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
4759                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4760                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4761                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
4762                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4763                set_cc_op(s, CC_OP_MULW);
4764                break;
4765            default:
4766            case MO_32:
4767                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4768                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4769                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4770                                  cpu_tmp2_i32, cpu_tmp3_i32);
4771                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4772                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4773                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4774                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4775                set_cc_op(s, CC_OP_MULL);
4776                break;
4777#ifdef TARGET_X86_64
4778            case MO_64:
4779                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4780                                  cpu_T[0], cpu_regs[R_EAX]);
4781                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4782                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4783                set_cc_op(s, CC_OP_MULQ);
4784                break;
4785#endif
4786            }
4787            break;
4788        case 5: /* imul */
4789            switch(ot) {
4790            case MO_8:
4791                gen_op_mov_v_reg(MO_8, cpu_T[1], R_EAX);
4792                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4793                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
4794                /* XXX: use 32 bit mul which could be faster */
4795                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4796                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
4797                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4798                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
4799                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4800                set_cc_op(s, CC_OP_MULB);
4801                break;
4802            case MO_16:
4803                gen_op_mov_v_reg(MO_16, cpu_T[1], R_EAX);
4804                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4805                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4806                /* XXX: use 32 bit mul which could be faster */
4807                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4808                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
4809                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4810                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4811                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4812                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4813                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
4814                set_cc_op(s, CC_OP_MULW);
4815                break;
4816            default:
4817            case MO_32:
4818                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4819                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4820                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4821                                  cpu_tmp2_i32, cpu_tmp3_i32);
4822                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4823                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4824                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4825                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4826                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4827                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4828                set_cc_op(s, CC_OP_MULL);
4829                break;
4830#ifdef TARGET_X86_64
4831            case MO_64:
4832                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4833                                  cpu_T[0], cpu_regs[R_EAX]);
4834                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4835                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4836                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4837                set_cc_op(s, CC_OP_MULQ);
4838                break;
4839#endif
4840            }
4841            break;
4842        case 6: /* div */
4843            switch(ot) {
4844            case MO_8:
4845                gen_helper_divb_AL(cpu_env, cpu_T[0]);
4846                break;
4847            case MO_16:
4848                gen_helper_divw_AX(cpu_env, cpu_T[0]);
4849                break;
4850            default:
4851            case MO_32:
4852                gen_helper_divl_EAX(cpu_env, cpu_T[0]);
4853                break;
4854#ifdef TARGET_X86_64
4855            case MO_64:
4856                gen_helper_divq_EAX(cpu_env, cpu_T[0]);
4857                break;
4858#endif
4859            }
4860            break;
4861        case 7: /* idiv */
4862            switch(ot) {
4863            case MO_8:
4864                gen_helper_idivb_AL(cpu_env, cpu_T[0]);
4865                break;
4866            case MO_16:
4867                gen_helper_idivw_AX(cpu_env, cpu_T[0]);
4868                break;
4869            default:
4870            case MO_32:
4871                gen_helper_idivl_EAX(cpu_env, cpu_T[0]);
4872                break;
4873#ifdef TARGET_X86_64
4874            case MO_64:
4875                gen_helper_idivq_EAX(cpu_env, cpu_T[0]);
4876                break;
4877#endif
4878            }
4879            break;
4880        default:
4881            goto illegal_op;
4882        }
4883        break;
4884
4885    case 0xfe: /* GRP4 */
4886    case 0xff: /* GRP5 */
4887        ot = mo_b_d(b, dflag);
4888
4889        modrm = cpu_ldub_code(env, s->pc++);
4890        mod = (modrm >> 6) & 3;
4891        rm = (modrm & 7) | REX_B(s);
4892        op = (modrm >> 3) & 7;
4893        if (op >= 2 && b == 0xfe) {
4894            goto illegal_op;
4895        }
4896        if (CODE64(s)) {
4897            if (op == 2 || op == 4) {
4898                /* operand size for jumps is 64 bit */
4899                ot = MO_64;
4900            } else if (op == 3 || op == 5) {
4901                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4902            } else if (op == 6) {
4903                /* default push size is 64 bit */
4904                ot = mo_pushpop(s, dflag);
4905            }
4906        }
4907        if (mod != 3) {
4908            gen_lea_modrm(env, s, modrm);
4909            if (op >= 2 && op != 3 && op != 5)
4910                gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
4911        } else {
4912            gen_op_mov_v_reg(ot, cpu_T[0], rm);
4913        }
4914
4915        switch(op) {
4916        case 0: /* inc Ev */
4917            if (mod != 3)
4918                opreg = OR_TMP0;
4919            else
4920                opreg = rm;
4921            gen_inc(s, ot, opreg, 1);
4922            break;
4923        case 1: /* dec Ev */
4924            if (mod != 3)
4925                opreg = OR_TMP0;
4926            else
4927                opreg = rm;
4928            gen_inc(s, ot, opreg, -1);
4929            break;
4930        case 2: /* call Ev */
4931            /* XXX: optimize if memory (no 'and' is necessary) */
4932            if (dflag == MO_16) {
4933                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4934            }
4935            next_eip = s->pc - s->cs_base;
4936            tcg_gen_movi_tl(cpu_T[1], next_eip);
4937            gen_push_v(s, cpu_T[1]);
4938            gen_op_jmp_v(cpu_T[0]);
4939            gen_eob(s);
4940            break;
4941        case 3: /* lcall Ev */
4942            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
4943            gen_add_A0_im(s, 1 << ot);
4944            gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
4945        do_lcall:
4946            if (s->pe && !s->vm86) {
4947                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4948                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
4949                                           tcg_const_i32(dflag - 1),
4950                                           tcg_const_tl(s->pc - s->cs_base));
4951            } else {
4952                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4953                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T[1],
4954                                      tcg_const_i32(dflag - 1),
4955                                      tcg_const_i32(s->pc - s->cs_base));
4956            }
4957            gen_eob(s);
4958            break;
4959        case 4: /* jmp Ev */
4960            if (dflag == MO_16) {
4961                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4962            }
4963            gen_op_jmp_v(cpu_T[0]);
4964            gen_eob(s);
4965            break;
4966        case 5: /* ljmp Ev */
4967            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
4968            gen_add_A0_im(s, 1 << ot);
4969            gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
4970        do_ljmp:
4971            if (s->pe && !s->vm86) {
4972                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4973                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
4974                                          tcg_const_tl(s->pc - s->cs_base));
4975            } else {
4976                gen_op_movl_seg_T0_vm(R_CS);
4977                gen_op_jmp_v(cpu_T[1]);
4978            }
4979            gen_eob(s);
4980            break;
4981        case 6: /* push Ev */
4982            gen_push_v(s, cpu_T[0]);
4983            break;
4984        default:
4985            goto illegal_op;
4986        }
4987        break;
4988
4989    case 0x84: /* test Ev, Gv */
4990    case 0x85:
4991        ot = mo_b_d(b, dflag);
4992
4993        modrm = cpu_ldub_code(env, s->pc++);
4994        reg = ((modrm >> 3) & 7) | rex_r;
4995
4996        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4997        gen_op_mov_v_reg(ot, cpu_T[1], reg);
4998        gen_op_testl_T0_T1_cc();
4999        set_cc_op(s, CC_OP_LOGICB + ot);
5000        break;
5001
5002    case 0xa8: /* test eAX, Iv */
5003    case 0xa9:
5004        ot = mo_b_d(b, dflag);
5005        val = insn_get(env, s, ot);
5006
5007        gen_op_mov_v_reg(ot, cpu_T[0], OR_EAX);
5008        tcg_gen_movi_tl(cpu_T[1], val);
5009        gen_op_testl_T0_T1_cc();
5010        set_cc_op(s, CC_OP_LOGICB + ot);
5011        break;
5012
5013    case 0x98: /* CWDE/CBW */
5014        switch (dflag) {
5015#ifdef TARGET_X86_64
5016        case MO_64:
5017            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
5018            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
5019            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T[0]);
5020            break;
5021#endif
5022        case MO_32:
5023            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EAX);
5024            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
5025            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T[0]);
5026            break;
5027        case MO_16:
5028            gen_op_mov_v_reg(MO_8, cpu_T[0], R_EAX);
5029            tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
5030            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
5031            break;
5032        default:
5033            tcg_abort();
5034        }
5035        break;
5036    case 0x99: /* CDQ/CWD */
5037        switch (dflag) {
5038#ifdef TARGET_X86_64
5039        case MO_64:
5040            gen_op_mov_v_reg(MO_64, cpu_T[0], R_EAX);
5041            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
5042            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T[0]);
5043            break;
5044#endif
5045        case MO_32:
5046            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
5047            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
5048            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
5049            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T[0]);
5050            break;
5051        case MO_16:
5052            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EAX);
5053            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
5054            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
5055            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
5056            break;
5057        default:
5058            tcg_abort();
5059        }
5060        break;
5061    case 0x1af: /* imul Gv, Ev */
5062    case 0x69: /* imul Gv, Ev, I */
5063    case 0x6b:
5064        ot = dflag;
5065        modrm = cpu_ldub_code(env, s->pc++);
5066        reg = ((modrm >> 3) & 7) | rex_r;
5067        if (b == 0x69)
5068            s->rip_offset = insn_const_size(ot);
5069        else if (b == 0x6b)
5070            s->rip_offset = 1;
5071        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5072        if (b == 0x69) {
5073            val = insn_get(env, s, ot);
5074            tcg_gen_movi_tl(cpu_T[1], val);
5075        } else if (b == 0x6b) {
5076            val = (int8_t)insn_get(env, s, MO_8);
5077            tcg_gen_movi_tl(cpu_T[1], val);
5078        } else {
5079            gen_op_mov_v_reg(ot, cpu_T[1], reg);
5080        }
5081        switch (ot) {
5082#ifdef TARGET_X86_64
5083        case MO_64:
5084            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]);
5085            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5086            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5087            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]);
5088            break;
5089#endif
5090        case MO_32:
5091            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
5092            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
5093            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5094                              cpu_tmp2_i32, cpu_tmp3_i32);
5095            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5096            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5097            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5098            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5099            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5100            break;
5101        default:
5102            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
5103            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
5104            /* XXX: use 32 bit mul which could be faster */
5105            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
5106            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
5107            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
5108            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
5109            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
5110            break;
5111        }
5112        set_cc_op(s, CC_OP_MULB + ot);
5113        break;
5114    case 0x1c0:
5115    case 0x1c1: /* xadd Ev, Gv */
5116        ot = mo_b_d(b, dflag);
5117        modrm = cpu_ldub_code(env, s->pc++);
5118        reg = ((modrm >> 3) & 7) | rex_r;
5119        mod = (modrm >> 6) & 3;
5120        if (mod == 3) {
5121            rm = (modrm & 7) | REX_B(s);
5122            gen_op_mov_v_reg(ot, cpu_T[0], reg);
5123            gen_op_mov_v_reg(ot, cpu_T[1], rm);
5124            tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
5125            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5126            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
5127        } else {
5128            gen_lea_modrm(env, s, modrm);
5129            gen_op_mov_v_reg(ot, cpu_T[0], reg);
5130            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
5131            tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
5132            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
5133            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5134        }
5135        gen_op_update2_cc();
5136        set_cc_op(s, CC_OP_ADDB + ot);
5137        break;
5138    case 0x1b0:
5139    case 0x1b1: /* cmpxchg Ev, Gv */
5140        {
5141            TCGLabel *label1, *label2;
5142            TCGv t0, t1, t2, a0;
5143
5144            ot = mo_b_d(b, dflag);
5145            modrm = cpu_ldub_code(env, s->pc++);
5146            reg = ((modrm >> 3) & 7) | rex_r;
5147            mod = (modrm >> 6) & 3;
5148            t0 = tcg_temp_local_new();
5149            t1 = tcg_temp_local_new();
5150            t2 = tcg_temp_local_new();
5151            a0 = tcg_temp_local_new();
5152            gen_op_mov_v_reg(ot, t1, reg);
5153            if (mod == 3) {
5154                rm = (modrm & 7) | REX_B(s);
5155                gen_op_mov_v_reg(ot, t0, rm);
5156            } else {
5157                gen_lea_modrm(env, s, modrm);
5158                tcg_gen_mov_tl(a0, cpu_A0);
5159                gen_op_ld_v(s, ot, t0, a0);
5160                rm = 0; /* avoid warning */
5161            }
5162            label1 = gen_new_label();
5163            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
5164            gen_extu(ot, t0);
5165            gen_extu(ot, t2);
5166            tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
5167            label2 = gen_new_label();
5168            if (mod == 3) {
5169                gen_op_mov_reg_v(ot, R_EAX, t0);
5170                tcg_gen_br(label2);
5171                gen_set_label(label1);
5172                gen_op_mov_reg_v(ot, rm, t1);
5173            } else {
5174                /* perform no-op store cycle like physical cpu; must be
5175                   before changing accumulator to ensure idempotency if
5176                   the store faults and the instruction is restarted */
5177                gen_op_st_v(s, ot, t0, a0);
5178                gen_op_mov_reg_v(ot, R_EAX, t0);
5179                tcg_gen_br(label2);
5180                gen_set_label(label1);
5181                gen_op_st_v(s, ot, t1, a0);
5182            }
5183            gen_set_label(label2);
5184            tcg_gen_mov_tl(cpu_cc_src, t0);
5185            tcg_gen_mov_tl(cpu_cc_srcT, t2);
5186            tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
5187            set_cc_op(s, CC_OP_SUBB + ot);
5188            tcg_temp_free(t0);
5189            tcg_temp_free(t1);
5190            tcg_temp_free(t2);
5191            tcg_temp_free(a0);
5192        }
5193        break;
5194    case 0x1c7: /* cmpxchg8b */
5195        modrm = cpu_ldub_code(env, s->pc++);
5196        mod = (modrm >> 6) & 3;
5197        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5198            goto illegal_op;
5199#ifdef TARGET_X86_64
5200        if (dflag == MO_64) {
5201            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5202                goto illegal_op;
5203            gen_lea_modrm(env, s, modrm);
5204            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5205        } else
5206#endif        
5207        {
5208            if (!(s->cpuid_features & CPUID_CX8))
5209                goto illegal_op;
5210            gen_lea_modrm(env, s, modrm);
5211            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5212        }
5213        set_cc_op(s, CC_OP_EFLAGS);
5214        break;
5215
5216        /**************************/
5217        /* push/pop */
5218    case 0x50 ... 0x57: /* push */
5219        gen_op_mov_v_reg(MO_32, cpu_T[0], (b & 7) | REX_B(s));
5220        gen_push_v(s, cpu_T[0]);
5221        break;
5222    case 0x58 ... 0x5f: /* pop */
5223        ot = gen_pop_T0(s);
5224        /* NOTE: order is important for pop %sp */
5225        gen_pop_update(s, ot);
5226        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T[0]);
5227        break;
5228    case 0x60: /* pusha */
5229        if (CODE64(s))
5230            goto illegal_op;
5231        gen_pusha(s);
5232        break;
5233    case 0x61: /* popa */
5234        if (CODE64(s))
5235            goto illegal_op;
5236        gen_popa(s);
5237        break;
5238    case 0x68: /* push Iv */
5239    case 0x6a:
5240        ot = mo_pushpop(s, dflag);
5241        if (b == 0x68)
5242            val = insn_get(env, s, ot);
5243        else
5244            val = (int8_t)insn_get(env, s, MO_8);
5245        tcg_gen_movi_tl(cpu_T[0], val);
5246        gen_push_v(s, cpu_T[0]);
5247        break;
5248    case 0x8f: /* pop Ev */
5249        modrm = cpu_ldub_code(env, s->pc++);
5250        mod = (modrm >> 6) & 3;
5251        ot = gen_pop_T0(s);
5252        if (mod == 3) {
5253            /* NOTE: order is important for pop %sp */
5254            gen_pop_update(s, ot);
5255            rm = (modrm & 7) | REX_B(s);
5256            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
5257        } else {
5258            /* NOTE: order is important too for MMU exceptions */
5259            s->popl_esp_hack = 1 << ot;
5260            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5261            s->popl_esp_hack = 0;
5262            gen_pop_update(s, ot);
5263        }
5264        break;
5265    case 0xc8: /* enter */
5266        {
5267            int level;
5268            val = cpu_lduw_code(env, s->pc);
5269            s->pc += 2;
5270            level = cpu_ldub_code(env, s->pc++);
5271            gen_enter(s, val, level);
5272        }
5273        break;
5274    case 0xc9: /* leave */
5275        /* XXX: exception not precise (ESP is updated before potential exception) */
5276        if (CODE64(s)) {
5277            gen_op_mov_v_reg(MO_64, cpu_T[0], R_EBP);
5278            gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[0]);
5279        } else if (s->ss32) {
5280            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
5281            gen_op_mov_reg_v(MO_32, R_ESP, cpu_T[0]);
5282        } else {
5283            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EBP);
5284            gen_op_mov_reg_v(MO_16, R_ESP, cpu_T[0]);
5285        }
5286        ot = gen_pop_T0(s);
5287        gen_op_mov_reg_v(ot, R_EBP, cpu_T[0]);
5288        gen_pop_update(s, ot);
5289        break;
5290    case 0x06: /* push es */
5291    case 0x0e: /* push cs */
5292    case 0x16: /* push ss */
5293    case 0x1e: /* push ds */
5294        if (CODE64(s))
5295            goto illegal_op;
5296        gen_op_movl_T0_seg(b >> 3);
5297        gen_push_v(s, cpu_T[0]);
5298        break;
5299    case 0x1a0: /* push fs */
5300    case 0x1a8: /* push gs */
5301        gen_op_movl_T0_seg((b >> 3) & 7);
5302        gen_push_v(s, cpu_T[0]);
5303        break;
5304    case 0x07: /* pop es */
5305    case 0x17: /* pop ss */
5306    case 0x1f: /* pop ds */
5307        if (CODE64(s))
5308            goto illegal_op;
5309        reg = b >> 3;
5310        ot = gen_pop_T0(s);
5311        gen_movl_seg_T0(s, reg);
5312        gen_pop_update(s, ot);
5313        if (reg == R_SS) {
5314            /* if reg == SS, inhibit interrupts/trace. */
5315            /* If several instructions disable interrupts, only the
5316               _first_ does it */
5317            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
5318                gen_helper_set_inhibit_irq(cpu_env);
5319            s->tf = 0;
5320        }
5321        if (s->is_jmp) {
5322            gen_jmp_im(s->pc - s->cs_base);
5323            gen_eob(s);
5324        }
5325        break;
5326    case 0x1a1: /* pop fs */
5327    case 0x1a9: /* pop gs */
5328        ot = gen_pop_T0(s);
5329        gen_movl_seg_T0(s, (b >> 3) & 7);
5330        gen_pop_update(s, ot);
5331        if (s->is_jmp) {
5332            gen_jmp_im(s->pc - s->cs_base);
5333            gen_eob(s);
5334        }
5335        break;
5336
5337        /**************************/
5338        /* mov */
5339    case 0x88:
5340    case 0x89: /* mov Gv, Ev */
5341        ot = mo_b_d(b, dflag);
5342        modrm = cpu_ldub_code(env, s->pc++);
5343        reg = ((modrm >> 3) & 7) | rex_r;
5344
5345        /* generate a generic store */
5346        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5347        break;
5348    case 0xc6:
5349    case 0xc7: /* mov Ev, Iv */
5350        ot = mo_b_d(b, dflag);
5351        modrm = cpu_ldub_code(env, s->pc++);
5352        mod = (modrm >> 6) & 3;
5353        if (mod != 3) {
5354            s->rip_offset = insn_const_size(ot);
5355            gen_lea_modrm(env, s, modrm);
5356        }
5357        val = insn_get(env, s, ot);
5358        tcg_gen_movi_tl(cpu_T[0], val);
5359        if (mod != 3) {
5360            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
5361        } else {
5362            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T[0]);
5363        }
5364        break;
5365    case 0x8a:
5366    case 0x8b: /* mov Ev, Gv */
5367        ot = mo_b_d(b, dflag);
5368        modrm = cpu_ldub_code(env, s->pc++);
5369        reg = ((modrm >> 3) & 7) | rex_r;
5370
5371        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5372        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
5373        break;
5374    case 0x8e: /* mov seg, Gv */
5375        modrm = cpu_ldub_code(env, s->pc++);
5376        reg = (modrm >> 3) & 7;
5377        if (reg >= 6 || reg == R_CS)
5378            goto illegal_op;
5379        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5380        gen_movl_seg_T0(s, reg);
5381        if (reg == R_SS) {
5382            /* if reg == SS, inhibit interrupts/trace */
5383            /* If several instructions disable interrupts, only the
5384               _first_ does it */
5385            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
5386                gen_helper_set_inhibit_irq(cpu_env);
5387            s->tf = 0;
5388        }
5389        if (s->is_jmp) {
5390            gen_jmp_im(s->pc - s->cs_base);
5391            gen_eob(s);
5392        }
5393        break;
5394    case 0x8c: /* mov Gv, seg */
5395        modrm = cpu_ldub_code(env, s->pc++);
5396        reg = (modrm >> 3) & 7;
5397        mod = (modrm >> 6) & 3;
5398        if (reg >= 6)
5399            goto illegal_op;
5400        gen_op_movl_T0_seg(reg);
5401        ot = mod == 3 ? dflag : MO_16;
5402        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5403        break;
5404
5405    case 0x1b6: /* movzbS Gv, Eb */
5406    case 0x1b7: /* movzwS Gv, Eb */
5407    case 0x1be: /* movsbS Gv, Eb */
5408    case 0x1bf: /* movswS Gv, Eb */
5409        {
5410            TCGMemOp d_ot;
5411            TCGMemOp s_ot;
5412
5413            /* d_ot is the size of destination */
5414            d_ot = dflag;
5415            /* ot is the size of source */
5416            ot = (b & 1) + MO_8;
5417            /* s_ot is the sign+size of source */
5418            s_ot = b & 8 ? MO_SIGN | ot : ot;
5419
5420            modrm = cpu_ldub_code(env, s->pc++);
5421            reg = ((modrm >> 3) & 7) | rex_r;
5422            mod = (modrm >> 6) & 3;
5423            rm = (modrm & 7) | REX_B(s);
5424
5425            if (mod == 3) {
5426                gen_op_mov_v_reg(ot, cpu_T[0], rm);
5427                switch (s_ot) {
5428                case MO_UB:
5429                    tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
5430                    break;
5431                case MO_SB:
5432                    tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
5433                    break;
5434                case MO_UW:
5435                    tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
5436                    break;
5437                default:
5438                case MO_SW:
5439                    tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
5440                    break;
5441                }
5442                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
5443            } else {
5444                gen_lea_modrm(env, s, modrm);
5445                gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
5446                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
5447            }
5448        }
5449        break;
5450
5451    case 0x8d: /* lea */
5452        ot = dflag;
5453        modrm = cpu_ldub_code(env, s->pc++);
5454        mod = (modrm >> 6) & 3;
5455        if (mod == 3)
5456            goto illegal_op;
5457        reg = ((modrm >> 3) & 7) | rex_r;
5458        /* we must ensure that no segment is added */
5459        s->override = -1;
5460        val = s->addseg;
5461        s->addseg = 0;
5462        gen_lea_modrm(env, s, modrm);
5463        s->addseg = val;
5464        gen_op_mov_reg_v(ot, reg, cpu_A0);
5465        break;
5466
5467    case 0xa0: /* mov EAX, Ov */
5468    case 0xa1:
5469    case 0xa2: /* mov Ov, EAX */
5470    case 0xa3:
5471        {
5472            target_ulong offset_addr;
5473
5474            ot = mo_b_d(b, dflag);
5475            switch (s->aflag) {
5476#ifdef TARGET_X86_64
5477            case MO_64:
5478                offset_addr = cpu_ldq_code(env, s->pc);
5479                s->pc += 8;
5480                break;
5481#endif
5482            default:
5483                offset_addr = insn_get(env, s, s->aflag);
5484                break;
5485            }
5486            tcg_gen_movi_tl(cpu_A0, offset_addr);
5487            gen_add_A0_ds_seg(s);
5488            if ((b & 2) == 0) {
5489                gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
5490                gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
5491            } else {
5492                gen_op_mov_v_reg(ot, cpu_T[0], R_EAX);
5493                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
5494            }
5495        }
5496        break;
5497    case 0xd7: /* xlat */
5498        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5499        tcg_gen_ext8u_tl(cpu_T[0], cpu_regs[R_EAX]);
5500        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
5501        gen_extu(s->aflag, cpu_A0);
5502        gen_add_A0_ds_seg(s);
5503        gen_op_ld_v(s, MO_8, cpu_T[0], cpu_A0);
5504        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T[0]);
5505        break;
5506    case 0xb0 ... 0xb7: /* mov R, Ib */
5507        val = insn_get(env, s, MO_8);
5508        tcg_gen_movi_tl(cpu_T[0], val);
5509        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T[0]);
5510        break;
5511    case 0xb8 ... 0xbf: /* mov R, Iv */
5512#ifdef TARGET_X86_64
5513        if (dflag == MO_64) {
5514            uint64_t tmp;
5515            /* 64 bit case */
5516            tmp = cpu_ldq_code(env, s->pc);
5517            s->pc += 8;
5518            reg = (b & 7) | REX_B(s);
5519            tcg_gen_movi_tl(cpu_T[0], tmp);
5520            gen_op_mov_reg_v(MO_64, reg, cpu_T[0]);
5521        } else
5522#endif
5523        {
5524            ot = dflag;
5525            val = insn_get(env, s, ot);
5526            reg = (b & 7) | REX_B(s);
5527            tcg_gen_movi_tl(cpu_T[0], val);
5528            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
5529        }
5530        break;
5531
5532    case 0x91 ... 0x97: /* xchg R, EAX */
5533    do_xchg_reg_eax:
5534        ot = dflag;
5535        reg = (b & 7) | REX_B(s);
5536        rm = R_EAX;
5537        goto do_xchg_reg;
5538    case 0x86:
5539    case 0x87: /* xchg Ev, Gv */
5540        ot = mo_b_d(b, dflag);
5541        modrm = cpu_ldub_code(env, s->pc++);
5542        reg = ((modrm >> 3) & 7) | rex_r;
5543        mod = (modrm >> 6) & 3;
5544        if (mod == 3) {
5545            rm = (modrm & 7) | REX_B(s);
5546        do_xchg_reg:
5547            gen_op_mov_v_reg(ot, cpu_T[0], reg);
5548            gen_op_mov_v_reg(ot, cpu_T[1], rm);
5549            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
5550            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5551        } else {
5552            gen_lea_modrm(env, s, modrm);
5553            gen_op_mov_v_reg(ot, cpu_T[0], reg);
5554            /* for xchg, lock is implicit */
5555            if (!(prefixes & PREFIX_LOCK))
5556                gen_helper_lock();
5557            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
5558            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
5559            if (!(prefixes & PREFIX_LOCK))
5560                gen_helper_unlock();
5561            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5562        }
5563        break;
5564    case 0xc4: /* les Gv */
5565        /* In CODE64 this is VEX3; see above.  */
5566        op = R_ES;
5567        goto do_lxx;
5568    case 0xc5: /* lds Gv */
5569        /* In CODE64 this is VEX2; see above.  */
5570        op = R_DS;
5571        goto do_lxx;
5572    case 0x1b2: /* lss Gv */
5573        op = R_SS;
5574        goto do_lxx;
5575    case 0x1b4: /* lfs Gv */
5576        op = R_FS;
5577        goto do_lxx;
5578    case 0x1b5: /* lgs Gv */
5579        op = R_GS;
5580    do_lxx:
5581        ot = dflag != MO_16 ? MO_32 : MO_16;
5582        modrm = cpu_ldub_code(env, s->pc++);
5583        reg = ((modrm >> 3) & 7) | rex_r;
5584        mod = (modrm >> 6) & 3;
5585        if (mod == 3)
5586            goto illegal_op;
5587        gen_lea_modrm(env, s, modrm);
5588        gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
5589        gen_add_A0_im(s, 1 << ot);
5590        /* load the segment first to handle exceptions properly */
5591        gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
5592        gen_movl_seg_T0(s, op);
5593        /* then put the data */
5594        gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5595        if (s->is_jmp) {
5596            gen_jmp_im(s->pc - s->cs_base);
5597            gen_eob(s);
5598        }
5599        break;
5600
5601        /************************/
5602        /* shifts */
5603    case 0xc0:
5604    case 0xc1:
5605        /* shift Ev,Ib */
5606        shift = 2;
5607    grp2:
5608        {
5609            ot = mo_b_d(b, dflag);
5610            modrm = cpu_ldub_code(env, s->pc++);
5611            mod = (modrm >> 6) & 3;
5612            op = (modrm >> 3) & 7;
5613
5614            if (mod != 3) {
5615                if (shift == 2) {
5616                    s->rip_offset = 1;
5617                }
5618                gen_lea_modrm(env, s, modrm);
5619                opreg = OR_TMP0;
5620            } else {
5621                opreg = (modrm & 7) | REX_B(s);
5622            }
5623
5624            /* simpler op */
5625            if (shift == 0) {
5626                gen_shift(s, op, ot, opreg, OR_ECX);
5627            } else {
5628                if (shift == 2) {
5629                    shift = cpu_ldub_code(env, s->pc++);
5630                }
5631                gen_shifti(s, op, ot, opreg, shift);
5632            }
5633        }
5634        break;
5635    case 0xd0:
5636    case 0xd1:
5637        /* shift Ev,1 */
5638        shift = 1;
5639        goto grp2;
5640    case 0xd2:
5641    case 0xd3:
5642        /* shift Ev,cl */
5643        shift = 0;
5644        goto grp2;
5645
5646    case 0x1a4: /* shld imm */
5647        op = 0;
5648        shift = 1;
5649        goto do_shiftd;
5650    case 0x1a5: /* shld cl */
5651        op = 0;
5652        shift = 0;
5653        goto do_shiftd;
5654    case 0x1ac: /* shrd imm */
5655        op = 1;
5656        shift = 1;
5657        goto do_shiftd;
5658    case 0x1ad: /* shrd cl */
5659        op = 1;
5660        shift = 0;
5661    do_shiftd:
5662        ot = dflag;
5663        modrm = cpu_ldub_code(env, s->pc++);
5664        mod = (modrm >> 6) & 3;
5665        rm = (modrm & 7) | REX_B(s);
5666        reg = ((modrm >> 3) & 7) | rex_r;
5667        if (mod != 3) {
5668            gen_lea_modrm(env, s, modrm);
5669            opreg = OR_TMP0;
5670        } else {
5671            opreg = rm;
5672        }
5673        gen_op_mov_v_reg(ot, cpu_T[1], reg);
5674
5675        if (shift) {
5676            TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
5677            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5678            tcg_temp_free(imm);
5679        } else {
5680            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5681        }
5682        break;
5683
5684        /************************/
5685        /* floats */
5686    case 0xd8 ... 0xdf:
5687        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5688            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5689            /* XXX: what to do if illegal op ? */
5690            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5691            break;
5692        }
5693        modrm = cpu_ldub_code(env, s->pc++);
5694        mod = (modrm >> 6) & 3;
5695        rm = modrm & 7;
5696        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5697        if (mod != 3) {
5698            /* memory op */
5699            gen_lea_modrm(env, s, modrm);
5700            switch(op) {
5701            case 0x00 ... 0x07: /* fxxxs */
5702            case 0x10 ... 0x17: /* fixxxl */
5703            case 0x20 ... 0x27: /* fxxxl */
5704            case 0x30 ... 0x37: /* fixxx */
5705                {
5706                    int op1;
5707                    op1 = op & 7;
5708
5709                    switch(op >> 4) {
5710                    case 0:
5711                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5712                                            s->mem_index, MO_LEUL);
5713                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5714                        break;
5715                    case 1:
5716                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5717                                            s->mem_index, MO_LEUL);
5718                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5719                        break;
5720                    case 2:
5721                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5722                                            s->mem_index, MO_LEQ);
5723                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5724                        break;
5725                    case 3:
5726                    default:
5727                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5728                                            s->mem_index, MO_LESW);
5729                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5730                        break;
5731                    }
5732
5733                    gen_helper_fp_arith_ST0_FT0(op1);
5734                    if (op1 == 3) {
5735                        /* fcomp needs pop */
5736                        gen_helper_fpop(cpu_env);
5737                    }
5738                }
5739                break;
5740            case 0x08: /* flds */
5741            case 0x0a: /* fsts */
5742            case 0x0b: /* fstps */
5743            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5744            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5745            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5746                switch(op & 7) {
5747                case 0:
5748                    switch(op >> 4) {
5749                    case 0:
5750                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5751                                            s->mem_index, MO_LEUL);
5752                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5753                        break;
5754                    case 1:
5755                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5756                                            s->mem_index, MO_LEUL);
5757                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5758                        break;
5759                    case 2:
5760                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5761                                            s->mem_index, MO_LEQ);
5762                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5763                        break;
5764                    case 3:
5765                    default:
5766                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5767                                            s->mem_index, MO_LESW);
5768                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5769                        break;
5770                    }
5771                    break;
5772                case 1:
5773                    /* XXX: the corresponding CPUID bit must be tested ! */
5774                    switch(op >> 4) {
5775                    case 1:
5776                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5777                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5778                                            s->mem_index, MO_LEUL);
5779                        break;
5780                    case 2:
5781                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5782                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5783                                            s->mem_index, MO_LEQ);
5784                        break;
5785                    case 3:
5786                    default:
5787                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5788                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5789                                            s->mem_index, MO_LEUW);
5790                        break;
5791                    }
5792                    gen_helper_fpop(cpu_env);
5793                    break;
5794                default:
5795                    switch(op >> 4) {
5796                    case 0:
5797                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5798                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5799                                            s->mem_index, MO_LEUL);
5800                        break;
5801                    case 1:
5802                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5803                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5804                                            s->mem_index, MO_LEUL);
5805                        break;
5806                    case 2:
5807                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5808                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5809                                            s->mem_index, MO_LEQ);
5810                        break;
5811                    case 3:
5812                    default:
5813                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5814                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5815                                            s->mem_index, MO_LEUW);
5816                        break;
5817                    }
5818                    if ((op & 7) == 3)
5819                        gen_helper_fpop(cpu_env);
5820                    break;
5821                }
5822                break;
5823            case 0x0c: /* fldenv mem */
5824                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5825                break;
5826            case 0x0d: /* fldcw mem */
5827                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5828                                    s->mem_index, MO_LEUW);
5829                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5830                break;
5831            case 0x0e: /* fnstenv mem */
5832                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5833                break;
5834            case 0x0f: /* fnstcw mem */
5835                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5836                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5837                                    s->mem_index, MO_LEUW);
5838                break;
5839            case 0x1d: /* fldt mem */
5840                gen_helper_fldt_ST0(cpu_env, cpu_A0);
5841                break;
5842            case 0x1f: /* fstpt mem */
5843                gen_helper_fstt_ST0(cpu_env, cpu_A0);
5844                gen_helper_fpop(cpu_env);
5845                break;
5846            case 0x2c: /* frstor mem */
5847                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5848                break;
5849            case 0x2e: /* fnsave mem */
5850                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5851                break;
5852            case 0x2f: /* fnstsw mem */
5853                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5854                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5855                                    s->mem_index, MO_LEUW);
5856                break;
5857            case 0x3c: /* fbld */
5858                gen_helper_fbld_ST0(cpu_env, cpu_A0);
5859                break;
5860            case 0x3e: /* fbstp */
5861                gen_helper_fbst_ST0(cpu_env, cpu_A0);
5862                gen_helper_fpop(cpu_env);
5863                break;
5864            case 0x3d: /* fildll */
5865                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5866                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5867                break;
5868            case 0x3f: /* fistpll */
5869                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5870                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5871                gen_helper_fpop(cpu_env);
5872                break;
5873            default:
5874                goto illegal_op;
5875            }
5876        } else {
5877            /* register float ops */
5878            opreg = rm;
5879
5880            switch(op) {
5881            case 0x08: /* fld sti */
5882                gen_helper_fpush(cpu_env);
5883                gen_helper_fmov_ST0_STN(cpu_env,
5884                                        tcg_const_i32((opreg + 1) & 7));
5885                break;
5886            case 0x09: /* fxchg sti */
5887            case 0x29: /* fxchg4 sti, undocumented op */
5888            case 0x39: /* fxchg7 sti, undocumented op */
5889                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5890                break;
5891            case 0x0a: /* grp d9/2 */
5892                switch(rm) {
5893                case 0: /* fnop */
5894                    /* check exceptions (FreeBSD FPU probe) */
5895                    gen_helper_fwait(cpu_env);
5896                    break;
5897                default:
5898                    goto illegal_op;
5899                }
5900                break;
5901            case 0x0c: /* grp d9/4 */
5902                switch(rm) {
5903                case 0: /* fchs */
5904                    gen_helper_fchs_ST0(cpu_env);
5905                    break;
5906                case 1: /* fabs */
5907                    gen_helper_fabs_ST0(cpu_env);
5908                    break;
5909                case 4: /* ftst */
5910                    gen_helper_fldz_FT0(cpu_env);
5911                    gen_helper_fcom_ST0_FT0(cpu_env);
5912                    break;
5913                case 5: /* fxam */
5914                    gen_helper_fxam_ST0(cpu_env);
5915                    break;
5916                default:
5917                    goto illegal_op;
5918                }
5919                break;
5920            case 0x0d: /* grp d9/5 */
5921                {
5922                    switch(rm) {
5923                    case 0:
5924                        gen_helper_fpush(cpu_env);
5925                        gen_helper_fld1_ST0(cpu_env);
5926                        break;
5927                    case 1:
5928                        gen_helper_fpush(cpu_env);
5929                        gen_helper_fldl2t_ST0(cpu_env);
5930                        break;
5931                    case 2:
5932                        gen_helper_fpush(cpu_env);
5933                        gen_helper_fldl2e_ST0(cpu_env);
5934                        break;
5935                    case 3:
5936                        gen_helper_fpush(cpu_env);
5937                        gen_helper_fldpi_ST0(cpu_env);
5938                        break;
5939                    case 4:
5940                        gen_helper_fpush(cpu_env);
5941                        gen_helper_fldlg2_ST0(cpu_env);
5942                        break;
5943                    case 5:
5944                        gen_helper_fpush(cpu_env);
5945                        gen_helper_fldln2_ST0(cpu_env);
5946                        break;
5947                    case 6:
5948                        gen_helper_fpush(cpu_env);
5949                        gen_helper_fldz_ST0(cpu_env);
5950                        break;
5951                    default:
5952                        goto illegal_op;
5953                    }
5954                }
5955                break;
5956            case 0x0e: /* grp d9/6 */
5957                switch(rm) {
5958                case 0: /* f2xm1 */
5959                    gen_helper_f2xm1(cpu_env);
5960                    break;
5961                case 1: /* fyl2x */
5962                    gen_helper_fyl2x(cpu_env);
5963                    break;
5964                case 2: /* fptan */
5965                    gen_helper_fptan(cpu_env);
5966                    break;
5967                case 3: /* fpatan */
5968                    gen_helper_fpatan(cpu_env);
5969                    break;
5970                case 4: /* fxtract */
5971                    gen_helper_fxtract(cpu_env);
5972                    break;
5973                case 5: /* fprem1 */
5974                    gen_helper_fprem1(cpu_env);
5975                    break;
5976                case 6: /* fdecstp */
5977                    gen_helper_fdecstp(cpu_env);
5978                    break;
5979                default:
5980                case 7: /* fincstp */
5981                    gen_helper_fincstp(cpu_env);
5982                    break;
5983                }
5984                break;
5985            case 0x0f: /* grp d9/7 */
5986                switch(rm) {
5987                case 0: /* fprem */
5988                    gen_helper_fprem(cpu_env);
5989                    break;
5990                case 1: /* fyl2xp1 */
5991                    gen_helper_fyl2xp1(cpu_env);
5992                    break;
5993                case 2: /* fsqrt */
5994                    gen_helper_fsqrt(cpu_env);
5995                    break;
5996                case 3: /* fsincos */
5997                    gen_helper_fsincos(cpu_env);
5998                    break;
5999                case 5: /* fscale */
6000                    gen_helper_fscale(cpu_env);
6001                    break;
6002                case 4: /* frndint */
6003                    gen_helper_frndint(cpu_env);
6004                    break;
6005                case 6: /* fsin */
6006                    gen_helper_fsin(cpu_env);
6007                    break;
6008                default:
6009                case 7: /* fcos */
6010                    gen_helper_fcos(cpu_env);
6011                    break;
6012                }
6013                break;
6014            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6015            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6016            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6017                {
6018                    int op1;
6019
6020                    op1 = op & 7;
6021                    if (op >= 0x20) {
6022                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6023                        if (op >= 0x30)
6024                            gen_helper_fpop(cpu_env);
6025                    } else {
6026                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6027                        gen_helper_fp_arith_ST0_FT0(op1);
6028                    }
6029                }
6030                break;
6031            case 0x02: /* fcom */
6032            case 0x22: /* fcom2, undocumented op */
6033                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6034                gen_helper_fcom_ST0_FT0(cpu_env);
6035                break;
6036            case 0x03: /* fcomp */
6037            case 0x23: /* fcomp3, undocumented op */
6038            case 0x32: /* fcomp5, undocumented op */
6039                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6040                gen_helper_fcom_ST0_FT0(cpu_env);
6041                gen_helper_fpop(cpu_env);
6042                break;
6043            case 0x15: /* da/5 */
6044                switch(rm) {
6045                case 1: /* fucompp */
6046                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6047                    gen_helper_fucom_ST0_FT0(cpu_env);
6048                    gen_helper_fpop(cpu_env);
6049                    gen_helper_fpop(cpu_env);
6050                    break;
6051                default:
6052                    goto illegal_op;
6053                }
6054                break;
6055            case 0x1c:
6056                switch(rm) {
6057                case 0: /* feni (287 only, just do nop here) */
6058                    break;
6059                case 1: /* fdisi (287 only, just do nop here) */
6060                    break;
6061                case 2: /* fclex */
6062                    gen_helper_fclex(cpu_env);
6063                    break;
6064                case 3: /* fninit */
6065                    gen_helper_fninit(cpu_env);
6066                    break;
6067                case 4: /* fsetpm (287 only, just do nop here) */
6068                    break;
6069                default:
6070                    goto illegal_op;
6071                }
6072                break;
6073            case 0x1d: /* fucomi */
6074                if (!(s->cpuid_features & CPUID_CMOV)) {
6075                    goto illegal_op;
6076                }
6077                gen_update_cc_op(s);
6078                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6079                gen_helper_fucomi_ST0_FT0(cpu_env);
6080                set_cc_op(s, CC_OP_EFLAGS);
6081                break;
6082            case 0x1e: /* fcomi */
6083                if (!(s->cpuid_features & CPUID_CMOV)) {
6084                    goto illegal_op;
6085                }
6086                gen_update_cc_op(s);
6087                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6088                gen_helper_fcomi_ST0_FT0(cpu_env);
6089                set_cc_op(s, CC_OP_EFLAGS);
6090                break;
6091            case 0x28: /* ffree sti */
6092                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6093                break;
6094            case 0x2a: /* fst sti */
6095                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6096                break;
6097            case 0x2b: /* fstp sti */
6098            case 0x0b: /* fstp1 sti, undocumented op */
6099            case 0x3a: /* fstp8 sti, undocumented op */
6100            case 0x3b: /* fstp9 sti, undocumented op */
6101                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6102                gen_helper_fpop(cpu_env);
6103                break;
6104            case 0x2c: /* fucom st(i) */
6105                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6106                gen_helper_fucom_ST0_FT0(cpu_env);
6107                break;
6108            case 0x2d: /* fucomp st(i) */
6109                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6110                gen_helper_fucom_ST0_FT0(cpu_env);
6111                gen_helper_fpop(cpu_env);
6112                break;
6113            case 0x33: /* de/3 */
6114                switch(rm) {
6115                case 1: /* fcompp */
6116                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6117                    gen_helper_fcom_ST0_FT0(cpu_env);
6118                    gen_helper_fpop(cpu_env);
6119                    gen_helper_fpop(cpu_env);
6120                    break;
6121                default:
6122                    goto illegal_op;
6123                }
6124                break;
6125            case 0x38: /* ffreep sti, undocumented op */
6126                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6127                gen_helper_fpop(cpu_env);
6128                break;
6129            case 0x3c: /* df/4 */
6130                switch(rm) {
6131                case 0:
6132                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6133                    tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
6134                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
6135                    break;
6136                default:
6137                    goto illegal_op;
6138                }
6139                break;
6140            case 0x3d: /* fucomip */
6141                if (!(s->cpuid_features & CPUID_CMOV)) {
6142                    goto illegal_op;
6143                }
6144                gen_update_cc_op(s);
6145                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6146                gen_helper_fucomi_ST0_FT0(cpu_env);
6147                gen_helper_fpop(cpu_env);
6148                set_cc_op(s, CC_OP_EFLAGS);
6149                break;
6150            case 0x3e: /* fcomip */
6151                if (!(s->cpuid_features & CPUID_CMOV)) {
6152                    goto illegal_op;
6153                }
6154                gen_update_cc_op(s);
6155                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6156                gen_helper_fcomi_ST0_FT0(cpu_env);
6157                gen_helper_fpop(cpu_env);
6158                set_cc_op(s, CC_OP_EFLAGS);
6159                break;
6160            case 0x10 ... 0x13: /* fcmovxx */
6161            case 0x18 ... 0x1b:
6162                {
6163                    int op1;
6164                    TCGLabel *l1;
6165                    static const uint8_t fcmov_cc[8] = {
6166                        (JCC_B << 1),
6167                        (JCC_Z << 1),
6168                        (JCC_BE << 1),
6169                        (JCC_P << 1),
6170                    };
6171
6172                    if (!(s->cpuid_features & CPUID_CMOV)) {
6173                        goto illegal_op;
6174                    }
6175                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6176                    l1 = gen_new_label();
6177                    gen_jcc1_noeob(s, op1, l1);
6178                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6179                    gen_set_label(l1);
6180                }
6181                break;
6182            default:
6183                goto illegal_op;
6184            }
6185        }
6186        break;
6187        /************************/
6188        /* string ops */
6189
6190    case 0xa4: /* movsS */
6191    case 0xa5:
6192        ot = mo_b_d(b, dflag);
6193        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6194            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6195        } else {
6196            gen_movs(s, ot);
6197        }
6198        break;
6199
6200    case 0xaa: /* stosS */
6201    case 0xab:
6202        ot = mo_b_d(b, dflag);
6203        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6204            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6205        } else {
6206            gen_stos(s, ot);
6207        }
6208        break;
6209    case 0xac: /* lodsS */
6210    case 0xad:
6211        ot = mo_b_d(b, dflag);
6212        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6213            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6214        } else {
6215            gen_lods(s, ot);
6216        }
6217        break;
6218    case 0xae: /* scasS */
6219    case 0xaf:
6220        ot = mo_b_d(b, dflag);
6221        if (prefixes & PREFIX_REPNZ) {
6222            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6223        } else if (prefixes & PREFIX_REPZ) {
6224            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6225        } else {
6226            gen_scas(s, ot);
6227        }
6228        break;
6229
6230    case 0xa6: /* cmpsS */
6231    case 0xa7:
6232        ot = mo_b_d(b, dflag);
6233        if (prefixes & PREFIX_REPNZ) {
6234            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6235        } else if (prefixes & PREFIX_REPZ) {
6236            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6237        } else {
6238            gen_cmps(s, ot);
6239        }
6240        break;
6241    case 0x6c: /* insS */
6242    case 0x6d:
6243        ot = mo_b_d32(b, dflag);
6244        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
6245        gen_check_io(s, ot, pc_start - s->cs_base, 
6246                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6247        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6248            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6249        } else {
6250            gen_ins(s, ot);
6251            if (s->tb->cflags & CF_USE_ICOUNT) {
6252                gen_jmp(s, s->pc - s->cs_base);
6253            }
6254        }
6255        break;
6256    case 0x6e: /* outsS */
6257    case 0x6f:
6258        ot = mo_b_d32(b, dflag);
6259        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
6260        gen_check_io(s, ot, pc_start - s->cs_base,
6261                     svm_is_rep(prefixes) | 4);
6262        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6263            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6264        } else {
6265            gen_outs(s, ot);
6266            if (s->tb->cflags & CF_USE_ICOUNT) {
6267                gen_jmp(s, s->pc - s->cs_base);
6268            }
6269        }
6270        break;
6271
6272        /************************/
6273        /* port I/O */
6274
6275    case 0xe4:
6276    case 0xe5:
6277        ot = mo_b_d32(b, dflag);
6278        val = cpu_ldub_code(env, s->pc++);
6279        tcg_gen_movi_tl(cpu_T[0], val);
6280        gen_check_io(s, ot, pc_start - s->cs_base,
6281                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6282        if (s->tb->cflags & CF_USE_ICOUNT) {
6283            gen_io_start();
6284        }
6285        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6286        gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
6287        gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
6288        gen_bpt_io(s, cpu_tmp2_i32, ot);
6289        if (s->tb->cflags & CF_USE_ICOUNT) {
6290            gen_io_end();
6291            gen_jmp(s, s->pc - s->cs_base);
6292        }
6293        break;
6294    case 0xe6:
6295    case 0xe7:
6296        ot = mo_b_d32(b, dflag);
6297        val = cpu_ldub_code(env, s->pc++);
6298        tcg_gen_movi_tl(cpu_T[0], val);
6299        gen_check_io(s, ot, pc_start - s->cs_base,
6300                     svm_is_rep(prefixes));
6301        gen_op_mov_v_reg(ot, cpu_T[1], R_EAX);
6302
6303        if (s->tb->cflags & CF_USE_ICOUNT) {
6304            gen_io_start();
6305        }
6306        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6307        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
6308        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6309        gen_bpt_io(s, cpu_tmp2_i32, ot);
6310        if (s->tb->cflags & CF_USE_ICOUNT) {
6311            gen_io_end();
6312            gen_jmp(s, s->pc - s->cs_base);
6313        }
6314        break;
6315    case 0xec:
6316    case 0xed:
6317        ot = mo_b_d32(b, dflag);
6318        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
6319        gen_check_io(s, ot, pc_start - s->cs_base,
6320                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6321        if (s->tb->cflags & CF_USE_ICOUNT) {
6322            gen_io_start();
6323        }
6324        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
6325        gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
6326        gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
6327        gen_bpt_io(s, cpu_tmp2_i32, ot);
6328        if (s->tb->cflags & CF_USE_ICOUNT) {
6329            gen_io_end();
6330            gen_jmp(s, s->pc - s->cs_base);
6331        }
6332        break;
6333    case 0xee:
6334    case 0xef:
6335        ot = mo_b_d32(b, dflag);
6336        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
6337        gen_check_io(s, ot, pc_start - s->cs_base,
6338                     svm_is_rep(prefixes));
6339        gen_op_mov_v_reg(ot, cpu_T[1], R_EAX);
6340
6341        if (s->tb->cflags & CF_USE_ICOUNT) {
6342            gen_io_start();
6343        }
6344        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
6345        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
6346        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6347        gen_bpt_io(s, cpu_tmp2_i32, ot);
6348        if (s->tb->cflags & CF_USE_ICOUNT) {
6349            gen_io_end();
6350            gen_jmp(s, s->pc - s->cs_base);
6351        }
6352        break;
6353
6354        /************************/
6355        /* control */
6356    case 0xc2: /* ret im */
6357        val = cpu_ldsw_code(env, s->pc);
6358        s->pc += 2;
6359        ot = gen_pop_T0(s);
6360        gen_stack_update(s, val + (1 << ot));
6361        /* Note that gen_pop_T0 uses a zero-extending load.  */
6362        gen_op_jmp_v(cpu_T[0]);
6363        gen_eob(s);
6364        break;
6365    case 0xc3: /* ret */
6366        ot = gen_pop_T0(s);
6367        gen_pop_update(s, ot);
6368        /* Note that gen_pop_T0 uses a zero-extending load.  */
6369        gen_op_jmp_v(cpu_T[0]);
6370        gen_eob(s);
6371        break;
6372    case 0xca: /* lret im */
6373        val = cpu_ldsw_code(env, s->pc);
6374        s->pc += 2;
6375    do_lret:
6376        if (s->pe && !s->vm86) {
6377            gen_update_cc_op(s);
6378            gen_jmp_im(pc_start - s->cs_base);
6379            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6380                                      tcg_const_i32(val));
6381        } else {
6382            gen_stack_A0(s);
6383            /* pop offset */
6384            gen_op_ld_v(s, dflag, cpu_T[0], cpu_A0);
6385            /* NOTE: keeping EIP updated is not a problem in case of
6386               exception */
6387            gen_op_jmp_v(cpu_T[0]);
6388            /* pop selector */
6389            gen_op_addl_A0_im(1 << dflag);
6390            gen_op_ld_v(s, dflag, cpu_T[0], cpu_A0);
6391            gen_op_movl_seg_T0_vm(R_CS);
6392            /* add stack offset */
6393            gen_stack_update(s, val + (2 << dflag));
6394        }
6395        gen_eob(s);
6396        break;
6397    case 0xcb: /* lret */
6398        val = 0;
6399        goto do_lret;
6400    case 0xcf: /* iret */
6401        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6402        if (!s->pe) {
6403            /* real mode */
6404            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6405            set_cc_op(s, CC_OP_EFLAGS);
6406        } else if (s->vm86) {
6407            if (s->iopl != 3) {
6408                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6409            } else {
6410                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6411                set_cc_op(s, CC_OP_EFLAGS);
6412            }
6413        } else {
6414            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6415                                      tcg_const_i32(s->pc - s->cs_base));
6416            set_cc_op(s, CC_OP_EFLAGS);
6417        }
6418        gen_eob(s);
6419        break;
6420    case 0xe8: /* call im */
6421        {
6422            if (dflag != MO_16) {
6423                tval = (int32_t)insn_get(env, s, MO_32);
6424            } else {
6425                tval = (int16_t)insn_get(env, s, MO_16);
6426            }
6427            next_eip = s->pc - s->cs_base;
6428            tval += next_eip;
6429            if (dflag == MO_16) {
6430                tval &= 0xffff;
6431            } else if (!CODE64(s)) {
6432                tval &= 0xffffffff;
6433            }
6434            tcg_gen_movi_tl(cpu_T[0], next_eip);
6435            gen_push_v(s, cpu_T[0]);
6436            gen_jmp(s, tval);
6437        }
6438        break;
6439    case 0x9a: /* lcall im */
6440        {
6441            unsigned int selector, offset;
6442
6443            if (CODE64(s))
6444                goto illegal_op;
6445            ot = dflag;
6446            offset = insn_get(env, s, ot);
6447            selector = insn_get(env, s, MO_16);
6448
6449            tcg_gen_movi_tl(cpu_T[0], selector);
6450            tcg_gen_movi_tl(cpu_T[1], offset);
6451        }
6452        goto do_lcall;
6453    case 0xe9: /* jmp im */
6454        if (dflag != MO_16) {
6455            tval = (int32_t)insn_get(env, s, MO_32);
6456        } else {
6457            tval = (int16_t)insn_get(env, s, MO_16);
6458        }
6459        tval += s->pc - s->cs_base;
6460        if (dflag == MO_16) {
6461            tval &= 0xffff;
6462        } else if (!CODE64(s)) {
6463            tval &= 0xffffffff;
6464        }
6465        gen_jmp(s, tval);
6466        break;
6467    case 0xea: /* ljmp im */
6468        {
6469            unsigned int selector, offset;
6470
6471            if (CODE64(s))
6472                goto illegal_op;
6473            ot = dflag;
6474            offset = insn_get(env, s, ot);
6475            selector = insn_get(env, s, MO_16);
6476
6477            tcg_gen_movi_tl(cpu_T[0], selector);
6478            tcg_gen_movi_tl(cpu_T[1], offset);
6479        }
6480        goto do_ljmp;
6481    case 0xeb: /* jmp Jb */
6482        tval = (int8_t)insn_get(env, s, MO_8);
6483        tval += s->pc - s->cs_base;
6484        if (dflag == MO_16) {
6485            tval &= 0xffff;
6486        }
6487        gen_jmp(s, tval);
6488        break;
6489    case 0x70 ... 0x7f: /* jcc Jb */
6490        tval = (int8_t)insn_get(env, s, MO_8);
6491        goto do_jcc;
6492    case 0x180 ... 0x18f: /* jcc Jv */
6493        if (dflag != MO_16) {
6494            tval = (int32_t)insn_get(env, s, MO_32);
6495        } else {
6496            tval = (int16_t)insn_get(env, s, MO_16);
6497        }
6498    do_jcc:
6499        next_eip = s->pc - s->cs_base;
6500        tval += next_eip;
6501        if (dflag == MO_16) {
6502            tval &= 0xffff;
6503        }
6504        gen_jcc(s, b, tval, next_eip);
6505        break;
6506
6507    case 0x190 ... 0x19f: /* setcc Gv */
6508        modrm = cpu_ldub_code(env, s->pc++);
6509        gen_setcc1(s, b, cpu_T[0]);
6510        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6511        break;
6512    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6513        if (!(s->cpuid_features & CPUID_CMOV)) {
6514            goto illegal_op;
6515        }
6516        ot = dflag;
6517        modrm = cpu_ldub_code(env, s->pc++);
6518        reg = ((modrm >> 3) & 7) | rex_r;
6519        gen_cmovcc1(env, s, ot, b, modrm, reg);
6520        break;
6521
6522        /************************/
6523        /* flags */
6524    case 0x9c: /* pushf */
6525        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6526        if (s->vm86 && s->iopl != 3) {
6527            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6528        } else {
6529            gen_update_cc_op(s);
6530            gen_helper_read_eflags(cpu_T[0], cpu_env);
6531            gen_push_v(s, cpu_T[0]);
6532        }
6533        break;
6534    case 0x9d: /* popf */
6535        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6536        if (s->vm86 && s->iopl != 3) {
6537            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6538        } else {
6539            ot = gen_pop_T0(s);
6540            if (s->cpl == 0) {
6541                if (dflag != MO_16) {
6542                    gen_helper_write_eflags(cpu_env, cpu_T[0],
6543                                            tcg_const_i32((TF_MASK | AC_MASK |
6544                                                           ID_MASK | NT_MASK |
6545                                                           IF_MASK |
6546                                                           IOPL_MASK)));
6547                } else {
6548                    gen_helper_write_eflags(cpu_env, cpu_T[0],
6549                                            tcg_const_i32((TF_MASK | AC_MASK |
6550                                                           ID_MASK | NT_MASK |
6551                                                           IF_MASK | IOPL_MASK)
6552                                                          & 0xffff));
6553                }
6554            } else {
6555                if (s->cpl <= s->iopl) {
6556                    if (dflag != MO_16) {
6557                        gen_helper_write_eflags(cpu_env, cpu_T[0],
6558                                                tcg_const_i32((TF_MASK |
6559                                                               AC_MASK |
6560                                                               ID_MASK |
6561                                                               NT_MASK |
6562                                                               IF_MASK)));
6563                    } else {
6564                        gen_helper_write_eflags(cpu_env, cpu_T[0],
6565                                                tcg_const_i32((TF_MASK |
6566                                                               AC_MASK |
6567                                                               ID_MASK |
6568                                                               NT_MASK |
6569                                                               IF_MASK)
6570                                                              & 0xffff));
6571                    }
6572                } else {
6573                    if (dflag != MO_16) {
6574                        gen_helper_write_eflags(cpu_env, cpu_T[0],
6575                                           tcg_const_i32((TF_MASK | AC_MASK |
6576                                                          ID_MASK | NT_MASK)));
6577                    } else {
6578                        gen_helper_write_eflags(cpu_env, cpu_T[0],
6579                                           tcg_const_i32((TF_MASK | AC_MASK |
6580                                                          ID_MASK | NT_MASK)
6581                                                         & 0xffff));
6582                    }
6583                }
6584            }
6585            gen_pop_update(s, ot);
6586            set_cc_op(s, CC_OP_EFLAGS);
6587            /* abort translation because TF/AC flag may change */
6588            gen_jmp_im(s->pc - s->cs_base);
6589            gen_eob(s);
6590        }
6591        break;
6592    case 0x9e: /* sahf */
6593        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6594            goto illegal_op;
6595        gen_op_mov_v_reg(MO_8, cpu_T[0], R_AH);
6596        gen_compute_eflags(s);
6597        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6598        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
6599        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
6600        break;
6601    case 0x9f: /* lahf */
6602        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6603            goto illegal_op;
6604        gen_compute_eflags(s);
6605        /* Note: gen_compute_eflags() only gives the condition codes */
6606        tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
6607        gen_op_mov_reg_v(MO_8, R_AH, cpu_T[0]);
6608        break;
6609    case 0xf5: /* cmc */
6610        gen_compute_eflags(s);
6611        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6612        break;
6613    case 0xf8: /* clc */
6614        gen_compute_eflags(s);
6615        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6616        break;
6617    case 0xf9: /* stc */
6618        gen_compute_eflags(s);
6619        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6620        break;
6621    case 0xfc: /* cld */
6622        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6623        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6624        break;
6625    case 0xfd: /* std */
6626        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6627        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6628        break;
6629
6630        /************************/
6631        /* bit operations */
6632    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6633        ot = dflag;
6634        modrm = cpu_ldub_code(env, s->pc++);
6635        op = (modrm >> 3) & 7;
6636        mod = (modrm >> 6) & 3;
6637        rm = (modrm & 7) | REX_B(s);
6638        if (mod != 3) {
6639            s->rip_offset = 1;
6640            gen_lea_modrm(env, s, modrm);
6641            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
6642        } else {
6643            gen_op_mov_v_reg(ot, cpu_T[0], rm);
6644        }
6645        /* load shift */
6646        val = cpu_ldub_code(env, s->pc++);
6647        tcg_gen_movi_tl(cpu_T[1], val);
6648        if (op < 4)
6649            goto illegal_op;
6650        op -= 4;
6651        goto bt_op;
6652    case 0x1a3: /* bt Gv, Ev */
6653        op = 0;
6654        goto do_btx;
6655    case 0x1ab: /* bts */
6656        op = 1;
6657        goto do_btx;
6658    case 0x1b3: /* btr */
6659        op = 2;
6660        goto do_btx;
6661    case 0x1bb: /* btc */
6662        op = 3;
6663    do_btx:
6664        ot = dflag;
6665        modrm = cpu_ldub_code(env, s->pc++);
6666        reg = ((modrm >> 3) & 7) | rex_r;
6667        mod = (modrm >> 6) & 3;
6668        rm = (modrm & 7) | REX_B(s);
6669        gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
6670        if (mod != 3) {
6671            gen_lea_modrm(env, s, modrm);
6672            /* specific case: we need to add a displacement */
6673            gen_exts(ot, cpu_T[1]);
6674            tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
6675            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6676            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
6677            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
6678        } else {
6679            gen_op_mov_v_reg(ot, cpu_T[0], rm);
6680        }
6681    bt_op:
6682        tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
6683        tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
6684        switch(op) {
6685        case 0:
6686            break;
6687        case 1:
6688            tcg_gen_movi_tl(cpu_tmp0, 1);
6689            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
6690            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
6691            break;
6692        case 2:
6693            tcg_gen_movi_tl(cpu_tmp0, 1);
6694            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
6695            tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
6696            break;
6697        default:
6698        case 3:
6699            tcg_gen_movi_tl(cpu_tmp0, 1);
6700            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
6701            tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
6702            break;
6703        }
6704        if (op != 0) {
6705            if (mod != 3) {
6706                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
6707            } else {
6708                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
6709            }
6710        }
6711
6712        /* Delay all CC updates until after the store above.  Note that
6713           C is the result of the test, Z is unchanged, and the others
6714           are all undefined.  */
6715        switch (s->cc_op) {
6716        case CC_OP_MULB ... CC_OP_MULQ:
6717        case CC_OP_ADDB ... CC_OP_ADDQ:
6718        case CC_OP_ADCB ... CC_OP_ADCQ:
6719        case CC_OP_SUBB ... CC_OP_SUBQ:
6720        case CC_OP_SBBB ... CC_OP_SBBQ:
6721        case CC_OP_LOGICB ... CC_OP_LOGICQ:
6722        case CC_OP_INCB ... CC_OP_INCQ:
6723        case CC_OP_DECB ... CC_OP_DECQ:
6724        case CC_OP_SHLB ... CC_OP_SHLQ:
6725        case CC_OP_SARB ... CC_OP_SARQ:
6726        case CC_OP_BMILGB ... CC_OP_BMILGQ:
6727            /* Z was going to be computed from the non-zero status of CC_DST.
6728               We can get that same Z value (and the new C value) by leaving
6729               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6730               same width.  */
6731            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6732            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6733            break;
6734        default:
6735            /* Otherwise, generate EFLAGS and replace the C bit.  */
6736            gen_compute_eflags(s);
6737            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
6738                               ctz32(CC_C), 1);
6739            break;
6740        }
6741        break;
6742    case 0x1bc: /* bsf / tzcnt */
6743    case 0x1bd: /* bsr / lzcnt */
6744        ot = dflag;
6745        modrm = cpu_ldub_code(env, s->pc++);
6746        reg = ((modrm >> 3) & 7) | rex_r;
6747        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6748        gen_extu(ot, cpu_T[0]);
6749
6750        /* Note that lzcnt and tzcnt are in different extensions.  */
6751        if ((prefixes & PREFIX_REPZ)
6752            && (b & 1
6753                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6754                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6755            int size = 8 << ot;
6756            tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
6757            if (b & 1) {
6758                /* For lzcnt, reduce the target_ulong result by the
6759                   number of zeros that we expect to find at the top.  */
6760                gen_helper_clz(cpu_T[0], cpu_T[0]);
6761                tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size);
6762            } else {
6763                /* For tzcnt, a zero input must return the operand size:
6764                   force all bits outside the operand size to 1.  */
6765                target_ulong mask = (target_ulong)-2 << (size - 1);
6766                tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask);
6767                gen_helper_ctz(cpu_T[0], cpu_T[0]);
6768            }
6769            /* For lzcnt/tzcnt, C and Z bits are defined and are
6770               related to the result.  */
6771            gen_op_update1_cc();
6772            set_cc_op(s, CC_OP_BMILGB + ot);
6773        } else {
6774            /* For bsr/bsf, only the Z bit is defined and it is related
6775               to the input and not the result.  */
6776            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
6777            set_cc_op(s, CC_OP_LOGICB + ot);
6778            if (b & 1) {
6779                /* For bsr, return the bit index of the first 1 bit,
6780                   not the count of leading zeros.  */
6781                gen_helper_clz(cpu_T[0], cpu_T[0]);
6782                tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1);
6783            } else {
6784                gen_helper_ctz(cpu_T[0], cpu_T[0]);
6785            }
6786            /* ??? The manual says that the output is undefined when the
6787               input is zero, but real hardware leaves it unchanged, and
6788               real programs appear to depend on that.  */
6789            tcg_gen_movi_tl(cpu_tmp0, 0);
6790            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0,
6791                               cpu_regs[reg], cpu_T[0]);
6792        }
6793        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
6794        break;
6795        /************************/
6796        /* bcd */
6797    case 0x27: /* daa */
6798        if (CODE64(s))
6799            goto illegal_op;
6800        gen_update_cc_op(s);
6801        gen_helper_daa(cpu_env);
6802        set_cc_op(s, CC_OP_EFLAGS);
6803        break;
6804    case 0x2f: /* das */
6805        if (CODE64(s))
6806            goto illegal_op;
6807        gen_update_cc_op(s);
6808        gen_helper_das(cpu_env);
6809        set_cc_op(s, CC_OP_EFLAGS);
6810        break;
6811    case 0x37: /* aaa */
6812        if (CODE64(s))
6813            goto illegal_op;
6814        gen_update_cc_op(s);
6815        gen_helper_aaa(cpu_env);
6816        set_cc_op(s, CC_OP_EFLAGS);
6817        break;
6818    case 0x3f: /* aas */
6819        if (CODE64(s))
6820            goto illegal_op;
6821        gen_update_cc_op(s);
6822        gen_helper_aas(cpu_env);
6823        set_cc_op(s, CC_OP_EFLAGS);
6824        break;
6825    case 0xd4: /* aam */
6826        if (CODE64(s))
6827            goto illegal_op;
6828        val = cpu_ldub_code(env, s->pc++);
6829        if (val == 0) {
6830            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6831        } else {
6832            gen_helper_aam(cpu_env, tcg_const_i32(val));
6833            set_cc_op(s, CC_OP_LOGICB);
6834        }
6835        break;
6836    case 0xd5: /* aad */
6837        if (CODE64(s))
6838            goto illegal_op;
6839        val = cpu_ldub_code(env, s->pc++);
6840        gen_helper_aad(cpu_env, tcg_const_i32(val));
6841        set_cc_op(s, CC_OP_LOGICB);
6842        break;
6843        /************************/
6844        /* misc */
6845    case 0x90: /* nop */
6846        /* XXX: correct lock test for all insn */
6847        if (prefixes & PREFIX_LOCK) {
6848            goto illegal_op;
6849        }
6850        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
6851        if (REX_B(s)) {
6852            goto do_xchg_reg_eax;
6853        }
6854        if (prefixes & PREFIX_REPZ) {
6855            gen_update_cc_op(s);
6856            gen_jmp_im(pc_start - s->cs_base);
6857            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6858            s->is_jmp = DISAS_TB_JUMP;
6859        }
6860        break;
6861    case 0x9b: /* fwait */
6862        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6863            (HF_MP_MASK | HF_TS_MASK)) {
6864            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6865        } else {
6866            gen_helper_fwait(cpu_env);
6867        }
6868        break;
6869    case 0xcc: /* int3 */
6870        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6871        break;
6872    case 0xcd: /* int N */
6873        val = cpu_ldub_code(env, s->pc++);
6874        if (s->vm86 && s->iopl != 3) {
6875            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6876        } else {
6877            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6878        }
6879        break;
6880    case 0xce: /* into */
6881        if (CODE64(s))
6882            goto illegal_op;
6883        gen_update_cc_op(s);
6884        gen_jmp_im(pc_start - s->cs_base);
6885        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
6886        break;
6887#ifdef WANT_ICEBP
6888    case 0xf1: /* icebp (undocumented, exits to external debugger) */
6889        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
6890#if 1
6891        gen_debug(s, pc_start - s->cs_base);
6892#else
6893        /* start debug */
6894        tb_flush(CPU(x86_env_get_cpu(env)));
6895        qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
6896#endif
6897        break;
6898#endif
6899    case 0xfa: /* cli */
6900        if (!s->vm86) {
6901            if (s->cpl <= s->iopl) {
6902                gen_helper_cli(cpu_env);
6903            } else {
6904                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6905            }
6906        } else {
6907            if (s->iopl == 3) {
6908                gen_helper_cli(cpu_env);
6909            } else {
6910                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6911            }
6912        }
6913        break;
6914    case 0xfb: /* sti */
6915        if (!s->vm86) {
6916            if (s->cpl <= s->iopl) {
6917            gen_sti:
6918                gen_helper_sti(cpu_env);
6919                /* interruptions are enabled only the first insn after sti */
6920                /* If several instructions disable interrupts, only the
6921                   _first_ does it */
6922                if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
6923                    gen_helper_set_inhibit_irq(cpu_env);
6924                /* give a chance to handle pending irqs */
6925                gen_jmp_im(s->pc - s->cs_base);
6926                gen_eob(s);
6927            } else {
6928                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6929            }
6930        } else {
6931            if (s->iopl == 3) {
6932                goto gen_sti;
6933            } else {
6934                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6935            }
6936        }
6937        break;
6938    case 0x62: /* bound */
6939        if (CODE64(s))
6940            goto illegal_op;
6941        ot = dflag;
6942        modrm = cpu_ldub_code(env, s->pc++);
6943        reg = (modrm >> 3) & 7;
6944        mod = (modrm >> 6) & 3;
6945        if (mod == 3)
6946            goto illegal_op;
6947        gen_op_mov_v_reg(ot, cpu_T[0], reg);
6948        gen_lea_modrm(env, s, modrm);
6949        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
6950        if (ot == MO_16) {
6951            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
6952        } else {
6953            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
6954        }
6955        break;
6956    case 0x1c8 ... 0x1cf: /* bswap reg */
6957        reg = (b & 7) | REX_B(s);
6958#ifdef TARGET_X86_64
6959        if (dflag == MO_64) {
6960            gen_op_mov_v_reg(MO_64, cpu_T[0], reg);
6961            tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]);
6962            gen_op_mov_reg_v(MO_64, reg, cpu_T[0]);
6963        } else
6964#endif
6965        {
6966            gen_op_mov_v_reg(MO_32, cpu_T[0], reg);
6967            tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
6968            tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
6969            gen_op_mov_reg_v(MO_32, reg, cpu_T[0]);
6970        }
6971        break;
6972    case 0xd6: /* salc */
6973        if (CODE64(s))
6974            goto illegal_op;
6975        gen_compute_eflags_c(s, cpu_T[0]);
6976        tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
6977        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T[0]);
6978        break;
6979    case 0xe0: /* loopnz */
6980    case 0xe1: /* loopz */
6981    case 0xe2: /* loop */
6982    case 0xe3: /* jecxz */
6983        {
6984            TCGLabel *l1, *l2, *l3;
6985
6986            tval = (int8_t)insn_get(env, s, MO_8);
6987            next_eip = s->pc - s->cs_base;
6988            tval += next_eip;
6989            if (dflag == MO_16) {
6990                tval &= 0xffff;
6991            }
6992
6993            l1 = gen_new_label();
6994            l2 = gen_new_label();
6995            l3 = gen_new_label();
6996            b &= 3;
6997            switch(b) {
6998            case 0: /* loopnz */
6999            case 1: /* loopz */
7000                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7001                gen_op_jz_ecx(s->aflag, l3);
7002                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7003                break;
7004            case 2: /* loop */
7005                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7006                gen_op_jnz_ecx(s->aflag, l1);
7007                break;
7008            default:
7009            case 3: /* jcxz */
7010                gen_op_jz_ecx(s->aflag, l1);
7011                break;
7012            }
7013
7014            gen_set_label(l3);
7015            gen_jmp_im(next_eip);
7016            tcg_gen_br(l2);
7017
7018            gen_set_label(l1);
7019            gen_jmp_im(tval);
7020            gen_set_label(l2);
7021            gen_eob(s);
7022        }
7023        break;
7024    case 0x130: /* wrmsr */
7025    case 0x132: /* rdmsr */
7026        if (s->cpl != 0) {
7027            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7028        } else {
7029            gen_update_cc_op(s);
7030            gen_jmp_im(pc_start - s->cs_base);
7031            if (b & 2) {
7032                gen_helper_rdmsr(cpu_env);
7033            } else {
7034                gen_helper_wrmsr(cpu_env);
7035            }
7036        }
7037        break;
7038    case 0x131: /* rdtsc */
7039        gen_update_cc_op(s);
7040        gen_jmp_im(pc_start - s->cs_base);
7041        if (s->tb->cflags & CF_USE_ICOUNT) {
7042            gen_io_start();
7043        }
7044        gen_helper_rdtsc(cpu_env);
7045        if (s->tb->cflags & CF_USE_ICOUNT) {
7046            gen_io_end();
7047            gen_jmp(s, s->pc - s->cs_base);
7048        }
7049        break;
7050    case 0x133: /* rdpmc */
7051        gen_update_cc_op(s);
7052        gen_jmp_im(pc_start - s->cs_base);
7053        gen_helper_rdpmc(cpu_env);
7054        break;
7055    case 0x134: /* sysenter */
7056        /* For Intel SYSENTER is valid on 64-bit */
7057        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7058            goto illegal_op;
7059        if (!s->pe) {
7060            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7061        } else {
7062            gen_helper_sysenter(cpu_env);
7063            gen_eob(s);
7064        }
7065        break;
7066    case 0x135: /* sysexit */
7067        /* For Intel SYSEXIT is valid on 64-bit */
7068        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7069            goto illegal_op;
7070        if (!s->pe) {
7071            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7072        } else {
7073            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7074            gen_eob(s);
7075        }
7076        break;
7077#ifdef TARGET_X86_64
7078    case 0x105: /* syscall */
7079        /* XXX: is it usable in real mode ? */
7080        gen_update_cc_op(s);
7081        gen_jmp_im(pc_start - s->cs_base);
7082        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7083        gen_eob(s);
7084        break;
7085    case 0x107: /* sysret */
7086        if (!s->pe) {
7087            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7088        } else {
7089            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7090            /* condition codes are modified only in long mode */
7091            if (s->lma) {
7092                set_cc_op(s, CC_OP_EFLAGS);
7093            }
7094            gen_eob(s);
7095        }
7096        break;
7097#endif
7098    case 0x1a2: /* cpuid */
7099        gen_update_cc_op(s);
7100        gen_jmp_im(pc_start - s->cs_base);
7101        gen_helper_cpuid(cpu_env);
7102        break;
7103    case 0xf4: /* hlt */
7104        if (s->cpl != 0) {
7105            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7106        } else {
7107            gen_update_cc_op(s);
7108            gen_jmp_im(pc_start - s->cs_base);
7109            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7110            s->is_jmp = DISAS_TB_JUMP;
7111        }
7112        break;
7113    case 0x100:
7114        modrm = cpu_ldub_code(env, s->pc++);
7115        mod = (modrm >> 6) & 3;
7116        op = (modrm >> 3) & 7;
7117        switch(op) {
7118        case 0: /* sldt */
7119            if (!s->pe || s->vm86)
7120                goto illegal_op;
7121            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7122            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector));
7123            ot = mod == 3 ? dflag : MO_16;
7124            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7125            break;
7126        case 2: /* lldt */
7127            if (!s->pe || s->vm86)
7128                goto illegal_op;
7129            if (s->cpl != 0) {
7130                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7131            } else {
7132                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7133                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7134                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
7135                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7136            }
7137            break;
7138        case 1: /* str */
7139            if (!s->pe || s->vm86)
7140                goto illegal_op;
7141            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7142            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector));
7143            ot = mod == 3 ? dflag : MO_16;
7144            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7145            break;
7146        case 3: /* ltr */
7147            if (!s->pe || s->vm86)
7148                goto illegal_op;
7149            if (s->cpl != 0) {
7150                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7151            } else {
7152                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7153                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7154                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
7155                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7156            }
7157            break;
7158        case 4: /* verr */
7159        case 5: /* verw */
7160            if (!s->pe || s->vm86)
7161                goto illegal_op;
7162            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7163            gen_update_cc_op(s);
7164            if (op == 4) {
7165                gen_helper_verr(cpu_env, cpu_T[0]);
7166            } else {
7167                gen_helper_verw(cpu_env, cpu_T[0]);
7168            }
7169            set_cc_op(s, CC_OP_EFLAGS);
7170            break;
7171        default:
7172            goto illegal_op;
7173        }
7174        break;
7175    case 0x101:
7176        modrm = cpu_ldub_code(env, s->pc++);
7177        mod = (modrm >> 6) & 3;
7178        op = (modrm >> 3) & 7;
7179        rm = modrm & 7;
7180        switch(op) {
7181        case 0: /* sgdt */
7182            if (mod == 3)
7183                goto illegal_op;
7184            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7185            gen_lea_modrm(env, s, modrm);
7186            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
7187            gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
7188            gen_add_A0_im(s, 2);
7189            tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base));
7190            if (dflag == MO_16) {
7191                tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
7192            }
7193            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
7194            break;
7195        case 1:
7196            if (mod == 3) {
7197                switch (rm) {
7198                case 0: /* monitor */
7199                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
7200                        s->cpl != 0)
7201                        goto illegal_op;
7202                    gen_update_cc_op(s);
7203                    gen_jmp_im(pc_start - s->cs_base);
7204                    tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7205                    gen_extu(s->aflag, cpu_A0);
7206                    gen_add_A0_ds_seg(s);
7207                    gen_helper_monitor(cpu_env, cpu_A0);
7208                    break;
7209                case 1: /* mwait */
7210                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
7211                        s->cpl != 0)
7212                        goto illegal_op;
7213                    gen_update_cc_op(s);
7214                    gen_jmp_im(pc_start - s->cs_base);
7215                    gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7216                    gen_eob(s);
7217                    break;
7218                case 2: /* clac */
7219                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
7220                        s->cpl != 0) {
7221                        goto illegal_op;
7222                    }
7223                    gen_helper_clac(cpu_env);
7224                    gen_jmp_im(s->pc - s->cs_base);
7225                    gen_eob(s);
7226                    break;
7227                case 3: /* stac */
7228                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
7229                        s->cpl != 0) {
7230                        goto illegal_op;
7231                    }
7232                    gen_helper_stac(cpu_env);
7233                    gen_jmp_im(s->pc - s->cs_base);
7234                    gen_eob(s);
7235                    break;
7236                default:
7237                    goto illegal_op;
7238                }
7239            } else { /* sidt */
7240                gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7241                gen_lea_modrm(env, s, modrm);
7242                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
7243                gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
7244                gen_add_A0_im(s, 2);
7245                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base));
7246                if (dflag == MO_16) {
7247                    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
7248                }
7249                gen_op_st_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
7250            }
7251            break;
7252        case 2: /* lgdt */
7253        case 3: /* lidt */
7254            if (mod == 3) {
7255                gen_update_cc_op(s);
7256                gen_jmp_im(pc_start - s->cs_base);
7257                switch(rm) {
7258                case 0: /* VMRUN */
7259                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7260                        goto illegal_op;
7261                    if (s->cpl != 0) {
7262                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7263                        break;
7264                    } else {
7265                        gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7266                                         tcg_const_i32(s->pc - pc_start));
7267                        tcg_gen_exit_tb(0);
7268                        s->is_jmp = DISAS_TB_JUMP;
7269                    }
7270                    break;
7271                case 1: /* VMMCALL */
7272                    if (!(s->flags & HF_SVME_MASK))
7273                        goto illegal_op;
7274                    gen_helper_vmmcall(cpu_env);
7275                    break;
7276                case 2: /* VMLOAD */
7277                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7278                        goto illegal_op;
7279                    if (s->cpl != 0) {
7280                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7281                        break;
7282                    } else {
7283                        gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7284                    }
7285                    break;
7286                case 3: /* VMSAVE */
7287                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7288                        goto illegal_op;
7289                    if (s->cpl != 0) {
7290                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7291                        break;
7292                    } else {
7293                        gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7294                    }
7295                    break;
7296                case 4: /* STGI */
7297                    if ((!(s->flags & HF_SVME_MASK) &&
7298                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
7299                        !s->pe)
7300                        goto illegal_op;
7301                    if (s->cpl != 0) {
7302                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7303                        break;
7304                    } else {
7305                        gen_helper_stgi(cpu_env);
7306                    }
7307                    break;
7308                case 5: /* CLGI */
7309                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7310                        goto illegal_op;
7311                    if (s->cpl != 0) {
7312                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7313                        break;
7314                    } else {
7315                        gen_helper_clgi(cpu_env);
7316                    }
7317                    break;
7318                case 6: /* SKINIT */
7319                    if ((!(s->flags & HF_SVME_MASK) && 
7320                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
7321                        !s->pe)
7322                        goto illegal_op;
7323                    gen_helper_skinit(cpu_env);
7324                    break;
7325                case 7: /* INVLPGA */
7326                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7327                        goto illegal_op;
7328                    if (s->cpl != 0) {
7329                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7330                        break;
7331                    } else {
7332                        gen_helper_invlpga(cpu_env,
7333                                           tcg_const_i32(s->aflag - 1));
7334                    }
7335                    break;
7336                default:
7337                    goto illegal_op;
7338                }
7339            } else if (s->cpl != 0) {
7340                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7341            } else {
7342                gen_svm_check_intercept(s, pc_start,
7343                                        op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
7344                gen_lea_modrm(env, s, modrm);
7345                gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
7346                gen_add_A0_im(s, 2);
7347                gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
7348                if (dflag == MO_16) {
7349                    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
7350                }
7351                if (op == 2) {
7352                    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base));
7353                    tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit));
7354                } else {
7355                    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,idt.base));
7356                    tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,idt.limit));
7357                }
7358            }
7359            break;
7360        case 4: /* smsw */
7361            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7362#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN
7363            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]) + 4);
7364#else
7365            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
7366#endif
7367            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
7368            break;
7369        case 6: /* lmsw */
7370            if (s->cpl != 0) {
7371                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7372            } else {
7373                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7374                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7375                gen_helper_lmsw(cpu_env, cpu_T[0]);
7376                gen_jmp_im(s->pc - s->cs_base);
7377                gen_eob(s);
7378            }
7379            break;
7380        case 7:
7381            if (mod != 3) { /* invlpg */
7382                if (s->cpl != 0) {
7383                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7384                } else {
7385                    gen_update_cc_op(s);
7386                    gen_jmp_im(pc_start - s->cs_base);
7387                    gen_lea_modrm(env, s, modrm);
7388                    gen_helper_invlpg(cpu_env, cpu_A0);
7389                    gen_jmp_im(s->pc - s->cs_base);
7390                    gen_eob(s);
7391                }
7392            } else {
7393                switch (rm) {
7394                case 0: /* swapgs */
7395#ifdef TARGET_X86_64
7396                    if (CODE64(s)) {
7397                        if (s->cpl != 0) {
7398                            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7399                        } else {
7400                            tcg_gen_ld_tl(cpu_T[0], cpu_env,
7401                                offsetof(CPUX86State,segs[R_GS].base));
7402                            tcg_gen_ld_tl(cpu_T[1], cpu_env,
7403                                offsetof(CPUX86State,kernelgsbase));
7404                            tcg_gen_st_tl(cpu_T[1], cpu_env,
7405                                offsetof(CPUX86State,segs[R_GS].base));
7406                            tcg_gen_st_tl(cpu_T[0], cpu_env,
7407                                offsetof(CPUX86State,kernelgsbase));
7408                        }
7409                    } else
7410#endif
7411                    {
7412                        goto illegal_op;
7413                    }
7414                    break;
7415                case 1: /* rdtscp */
7416                    if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
7417                        goto illegal_op;
7418                    gen_update_cc_op(s);
7419                    gen_jmp_im(pc_start - s->cs_base);
7420                    if (s->tb->cflags & CF_USE_ICOUNT) {
7421                        gen_io_start();
7422                    }
7423                    gen_helper_rdtscp(cpu_env);
7424                    if (s->tb->cflags & CF_USE_ICOUNT) {
7425                        gen_io_end();
7426                        gen_jmp(s, s->pc - s->cs_base);
7427                    }
7428                    break;
7429                default:
7430                    goto illegal_op;
7431                }
7432            }
7433            break;
7434        default:
7435            goto illegal_op;
7436        }
7437        break;
7438    case 0x108: /* invd */
7439    case 0x109: /* wbinvd */
7440        if (s->cpl != 0) {
7441            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7442        } else {
7443            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7444            /* nothing to do */
7445        }
7446        break;
7447    case 0x63: /* arpl or movslS (x86_64) */
7448#ifdef TARGET_X86_64
7449        if (CODE64(s)) {
7450            int d_ot;
7451            /* d_ot is the size of destination */
7452            d_ot = dflag;
7453
7454            modrm = cpu_ldub_code(env, s->pc++);
7455            reg = ((modrm >> 3) & 7) | rex_r;
7456            mod = (modrm >> 6) & 3;
7457            rm = (modrm & 7) | REX_B(s);
7458
7459            if (mod == 3) {
7460                gen_op_mov_v_reg(MO_32, cpu_T[0], rm);
7461                /* sign extend */
7462                if (d_ot == MO_64) {
7463                    tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
7464                }
7465                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
7466            } else {
7467                gen_lea_modrm(env, s, modrm);
7468                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
7469                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
7470            }
7471        } else
7472#endif
7473        {
7474            TCGLabel *label1;
7475            TCGv t0, t1, t2, a0;
7476
7477            if (!s->pe || s->vm86)
7478                goto illegal_op;
7479            t0 = tcg_temp_local_new();
7480            t1 = tcg_temp_local_new();
7481            t2 = tcg_temp_local_new();
7482            ot = MO_16;
7483            modrm = cpu_ldub_code(env, s->pc++);
7484            reg = (modrm >> 3) & 7;
7485            mod = (modrm >> 6) & 3;
7486            rm = modrm & 7;
7487            if (mod != 3) {
7488                gen_lea_modrm(env, s, modrm);
7489                gen_op_ld_v(s, ot, t0, cpu_A0);
7490                a0 = tcg_temp_local_new();
7491                tcg_gen_mov_tl(a0, cpu_A0);
7492            } else {
7493                gen_op_mov_v_reg(ot, t0, rm);
7494                TCGV_UNUSED(a0);
7495            }
7496            gen_op_mov_v_reg(ot, t1, reg);
7497            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7498            tcg_gen_andi_tl(t1, t1, 3);
7499            tcg_gen_movi_tl(t2, 0);
7500            label1 = gen_new_label();
7501            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7502            tcg_gen_andi_tl(t0, t0, ~3);
7503            tcg_gen_or_tl(t0, t0, t1);
7504            tcg_gen_movi_tl(t2, CC_Z);
7505            gen_set_label(label1);
7506            if (mod != 3) {
7507                gen_op_st_v(s, ot, t0, a0);
7508                tcg_temp_free(a0);
7509           } else {
7510                gen_op_mov_reg_v(ot, rm, t0);
7511            }
7512            gen_compute_eflags(s);
7513            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7514            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7515            tcg_temp_free(t0);
7516            tcg_temp_free(t1);
7517            tcg_temp_free(t2);
7518        }
7519        break;
7520    case 0x102: /* lar */
7521    case 0x103: /* lsl */
7522        {
7523            TCGLabel *label1;
7524            TCGv t0;
7525            if (!s->pe || s->vm86)
7526                goto illegal_op;
7527            ot = dflag != MO_16 ? MO_32 : MO_16;
7528            modrm = cpu_ldub_code(env, s->pc++);
7529            reg = ((modrm >> 3) & 7) | rex_r;
7530            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7531            t0 = tcg_temp_local_new();
7532            gen_update_cc_op(s);
7533            if (b == 0x102) {
7534                gen_helper_lar(t0, cpu_env, cpu_T[0]);
7535            } else {
7536                gen_helper_lsl(t0, cpu_env, cpu_T[0]);
7537            }
7538            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7539            label1 = gen_new_label();
7540            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7541            gen_op_mov_reg_v(ot, reg, t0);
7542            gen_set_label(label1);
7543            set_cc_op(s, CC_OP_EFLAGS);
7544            tcg_temp_free(t0);
7545        }
7546        break;
7547    case 0x118:
7548        modrm = cpu_ldub_code(env, s->pc++);
7549        mod = (modrm >> 6) & 3;
7550        op = (modrm >> 3) & 7;
7551        switch(op) {
7552        case 0: /* prefetchnta */
7553        case 1: /* prefetchnt0 */
7554        case 2: /* prefetchnt0 */
7555        case 3: /* prefetchnt0 */
7556            if (mod == 3)
7557                goto illegal_op;
7558            gen_lea_modrm(env, s, modrm);
7559            /* nothing more to do */
7560            break;
7561        default: /* nop (multi byte) */
7562            gen_nop_modrm(env, s, modrm);
7563            break;
7564        }
7565        break;
7566    case 0x119 ... 0x11f: /* nop (multi byte) */
7567        modrm = cpu_ldub_code(env, s->pc++);
7568        gen_nop_modrm(env, s, modrm);
7569        break;
7570    case 0x120: /* mov reg, crN */
7571    case 0x122: /* mov crN, reg */
7572        if (s->cpl != 0) {
7573            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7574        } else {
7575            modrm = cpu_ldub_code(env, s->pc++);
7576            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7577             * AMD documentation (24594.pdf) and testing of
7578             * intel 386 and 486 processors all show that the mod bits
7579             * are assumed to be 1's, regardless of actual values.
7580             */
7581            rm = (modrm & 7) | REX_B(s);
7582            reg = ((modrm >> 3) & 7) | rex_r;
7583            if (CODE64(s))
7584                ot = MO_64;
7585            else
7586                ot = MO_32;
7587            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7588                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7589                reg = 8;
7590            }
7591            switch(reg) {
7592            case 0:
7593            case 2:
7594            case 3:
7595            case 4:
7596            case 8:
7597                gen_update_cc_op(s);
7598                gen_jmp_im(pc_start - s->cs_base);
7599                if (b & 2) {
7600                    gen_op_mov_v_reg(ot, cpu_T[0], rm);
7601                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7602                                         cpu_T[0]);
7603                    gen_jmp_im(s->pc - s->cs_base);
7604                    gen_eob(s);
7605                } else {
7606                    gen_helper_read_crN(cpu_T[0], cpu_env, tcg_const_i32(reg));
7607                    gen_op_mov_reg_v(ot, rm, cpu_T[0]);
7608                }
7609                break;
7610            default:
7611                goto illegal_op;
7612            }
7613        }
7614        break;
7615    case 0x121: /* mov reg, drN */
7616    case 0x123: /* mov drN, reg */
7617        if (s->cpl != 0) {
7618            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7619        } else {
7620            modrm = cpu_ldub_code(env, s->pc++);
7621            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7622             * AMD documentation (24594.pdf) and testing of
7623             * intel 386 and 486 processors all show that the mod bits
7624             * are assumed to be 1's, regardless of actual values.
7625             */
7626            rm = (modrm & 7) | REX_B(s);
7627            reg = ((modrm >> 3) & 7) | rex_r;
7628            if (CODE64(s))
7629                ot = MO_64;
7630            else
7631                ot = MO_32;
7632            if (reg >= 8) {
7633                goto illegal_op;
7634            }
7635            if (b & 2) {
7636                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
7637                gen_op_mov_v_reg(ot, cpu_T[0], rm);
7638                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7639                gen_helper_set_dr(cpu_env, cpu_tmp2_i32, cpu_T[0]);
7640                gen_jmp_im(s->pc - s->cs_base);
7641                gen_eob(s);
7642            } else {
7643                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
7644                tcg_gen_movi_i32(cpu_tmp2_i32, reg);
7645                gen_helper_get_dr(cpu_T[0], cpu_env, cpu_tmp2_i32);
7646                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
7647            }
7648        }
7649        break;
7650    case 0x106: /* clts */
7651        if (s->cpl != 0) {
7652            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7653        } else {
7654            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7655            gen_helper_clts(cpu_env);
7656            /* abort block because static cpu state changed */
7657            gen_jmp_im(s->pc - s->cs_base);
7658            gen_eob(s);
7659        }
7660        break;
7661    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
7662    case 0x1c3: /* MOVNTI reg, mem */
7663        if (!(s->cpuid_features & CPUID_SSE2))
7664            goto illegal_op;
7665        ot = mo_64_32(dflag);
7666        modrm = cpu_ldub_code(env, s->pc++);
7667        mod = (modrm >> 6) & 3;
7668        if (mod == 3)
7669            goto illegal_op;
7670        reg = ((modrm >> 3) & 7) | rex_r;
7671        /* generate a generic store */
7672        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
7673        break;
7674    case 0x1ae:
7675        modrm = cpu_ldub_code(env, s->pc++);
7676        mod = (modrm >> 6) & 3;
7677        op = (modrm >> 3) & 7;
7678        switch(op) {
7679        case 0: /* fxsave */
7680            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
7681                (s->prefix & PREFIX_LOCK))
7682                goto illegal_op;
7683            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7684                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7685                break;
7686            }
7687            gen_lea_modrm(env, s, modrm);
7688            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
7689            break;
7690        case 1: /* fxrstor */
7691            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
7692                (s->prefix & PREFIX_LOCK))
7693                goto illegal_op;
7694            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7695                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7696                break;
7697            }
7698            gen_lea_modrm(env, s, modrm);
7699            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
7700            break;
7701        case 2: /* ldmxcsr */
7702        case 3: /* stmxcsr */
7703            if (s->flags & HF_TS_MASK) {
7704                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7705                break;
7706            }
7707            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
7708                mod == 3)
7709                goto illegal_op;
7710            gen_lea_modrm(env, s, modrm);
7711            if (op == 2) {
7712                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
7713                                    s->mem_index, MO_LEUL);
7714                gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
7715            } else {
7716                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
7717                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
7718            }
7719            break;
7720        case 5: /* lfence */
7721            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2))
7722                goto illegal_op;
7723            break;
7724        case 6: /* mfence/clwb */
7725            if (s->prefix & PREFIX_DATA) {
7726                /* clwb */
7727                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB))
7728                    goto illegal_op;
7729                gen_nop_modrm(env, s, modrm);
7730            } else {
7731                /* mfence */
7732                if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2))
7733                    goto illegal_op;
7734            }
7735            break;
7736        case 7: /* sfence / clflush / clflushopt / pcommit */
7737            if ((modrm & 0xc7) == 0xc0) {
7738                if (s->prefix & PREFIX_DATA) {
7739                    /* pcommit */
7740                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT))
7741                        goto illegal_op;
7742                } else {
7743                    /* sfence */
7744                    /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
7745                    if (!(s->cpuid_features & CPUID_SSE))
7746                        goto illegal_op;
7747                }
7748            } else {
7749                if (s->prefix & PREFIX_DATA) {
7750                    /* clflushopt */
7751                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT))
7752                        goto illegal_op;
7753                } else {
7754                    /* clflush */
7755                    if (!(s->cpuid_features & CPUID_CLFLUSH))
7756                        goto illegal_op;
7757                }
7758                gen_lea_modrm(env, s, modrm);
7759            }
7760            break;
7761        default:
7762            goto illegal_op;
7763        }
7764        break;
7765    case 0x10d: /* 3DNow! prefetch(w) */
7766        modrm = cpu_ldub_code(env, s->pc++);
7767        mod = (modrm >> 6) & 3;
7768        if (mod == 3)
7769            goto illegal_op;
7770        gen_lea_modrm(env, s, modrm);
7771        /* ignore for now */
7772        break;
7773    case 0x1aa: /* rsm */
7774        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
7775        if (!(s->flags & HF_SMM_MASK))
7776            goto illegal_op;
7777        gen_update_cc_op(s);
7778        gen_jmp_im(s->pc - s->cs_base);
7779        gen_helper_rsm(cpu_env);
7780        gen_eob(s);
7781        break;
7782    case 0x1b8: /* SSE4.2 popcnt */
7783        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
7784             PREFIX_REPZ)
7785            goto illegal_op;
7786        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
7787            goto illegal_op;
7788
7789        modrm = cpu_ldub_code(env, s->pc++);
7790        reg = ((modrm >> 3) & 7) | rex_r;
7791
7792        if (s->prefix & PREFIX_DATA) {
7793            ot = MO_16;
7794        } else {
7795            ot = mo_64_32(dflag);
7796        }
7797
7798        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7799        gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
7800        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
7801
7802        set_cc_op(s, CC_OP_EFLAGS);
7803        break;
7804    case 0x10e ... 0x10f:
7805        /* 3DNow! instructions, ignore prefixes */
7806        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
7807    case 0x110 ... 0x117:
7808    case 0x128 ... 0x12f:
7809    case 0x138 ... 0x13a:
7810    case 0x150 ... 0x179:
7811    case 0x17c ... 0x17f:
7812    case 0x1c2:
7813    case 0x1c4 ... 0x1c6:
7814    case 0x1d0 ... 0x1fe:
7815        gen_sse(env, s, b, pc_start, rex_r);
7816        break;
7817    default:
7818        goto illegal_op;
7819    }
7820    /* lock generation */
7821    if (s->prefix & PREFIX_LOCK)
7822        gen_helper_unlock();
7823    return s->pc;
7824 illegal_op:
7825    if (s->prefix & PREFIX_LOCK)
7826        gen_helper_unlock();
7827    /* XXX: ensure that no lock was generated */
7828    gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
7829    return s->pc;
7830}
7831
7832void optimize_flags_init(void)
7833{
7834    static const char reg_names[CPU_NB_REGS][4] = {
7835#ifdef TARGET_X86_64
7836        [R_EAX] = "rax",
7837        [R_EBX] = "rbx",
7838        [R_ECX] = "rcx",
7839        [R_EDX] = "rdx",
7840        [R_ESI] = "rsi",
7841        [R_EDI] = "rdi",
7842        [R_EBP] = "rbp",
7843        [R_ESP] = "rsp",
7844        [8]  = "r8",
7845        [9]  = "r9",
7846        [10] = "r10",
7847        [11] = "r11",
7848        [12] = "r12",
7849        [13] = "r13",
7850        [14] = "r14",
7851        [15] = "r15",
7852#else
7853        [R_EAX] = "eax",
7854        [R_EBX] = "ebx",
7855        [R_ECX] = "ecx",
7856        [R_EDX] = "edx",
7857        [R_ESI] = "esi",
7858        [R_EDI] = "edi",
7859        [R_EBP] = "ebp",
7860        [R_ESP] = "esp",
7861#endif
7862    };
7863    int i;
7864
7865    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
7866    cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
7867                                       offsetof(CPUX86State, cc_op), "cc_op");
7868    cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst),
7869                                    "cc_dst");
7870    cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
7871                                    "cc_src");
7872    cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
7873                                     "cc_src2");
7874
7875    for (i = 0; i < CPU_NB_REGS; ++i) {
7876        cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
7877                                         offsetof(CPUX86State, regs[i]),
7878                                         reg_names[i]);
7879    }
7880
7881    helper_lock_init();
7882}
7883
7884/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
7885   basic block 'tb'.  */
7886void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
7887{
7888    X86CPU *cpu = x86_env_get_cpu(env);
7889    CPUState *cs = CPU(cpu);
7890    DisasContext dc1, *dc = &dc1;
7891    target_ulong pc_ptr;
7892    uint64_t flags;
7893    target_ulong pc_start;
7894    target_ulong cs_base;
7895    int num_insns;
7896    int max_insns;
7897
7898    /* generate intermediate code */
7899    pc_start = tb->pc;
7900    cs_base = tb->cs_base;
7901    flags = tb->flags;
7902
7903    dc->pe = (flags >> HF_PE_SHIFT) & 1;
7904    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
7905    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
7906    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
7907    dc->f_st = 0;
7908    dc->vm86 = (flags >> VM_SHIFT) & 1;
7909    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
7910    dc->iopl = (flags >> IOPL_SHIFT) & 3;
7911    dc->tf = (flags >> TF_SHIFT) & 1;
7912    dc->singlestep_enabled = cs->singlestep_enabled;
7913    dc->cc_op = CC_OP_DYNAMIC;
7914    dc->cc_op_dirty = false;
7915    dc->cs_base = cs_base;
7916    dc->tb = tb;
7917    dc->popl_esp_hack = 0;
7918    /* select memory access functions */
7919    dc->mem_index = 0;
7920    if (flags & HF_SOFTMMU_MASK) {
7921        dc->mem_index = cpu_mmu_index(env, false);
7922    }
7923    dc->cpuid_features = env->features[FEAT_1_EDX];
7924    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
7925    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
7926    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
7927    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
7928#ifdef TARGET_X86_64
7929    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
7930    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
7931#endif
7932    dc->flags = flags;
7933    dc->jmp_opt = !(dc->tf || cs->singlestep_enabled ||
7934                    (flags & HF_INHIBIT_IRQ_MASK)
7935#ifndef CONFIG_SOFTMMU
7936                    || (flags & HF_SOFTMMU_MASK)
7937#endif
7938                    );
7939    /* Do not optimize repz jumps at all in icount mode, because
7940       rep movsS instructions are execured with different paths
7941       in !repz_opt and repz_opt modes. The first one was used
7942       always except single step mode. And this setting
7943       disables jumps optimization and control paths become
7944       equivalent in run and single step modes.
7945       Now there will be no jump optimization for repz in
7946       record/replay modes and there will always be an
7947       additional step for ecx=0 when icount is enabled.
7948     */
7949    dc->repz_opt = !dc->jmp_opt && !(tb->cflags & CF_USE_ICOUNT);
7950#if 0
7951    /* check addseg logic */
7952    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
7953        printf("ERROR addseg\n");
7954#endif
7955
7956    cpu_T[0] = tcg_temp_new();
7957    cpu_T[1] = tcg_temp_new();
7958    cpu_A0 = tcg_temp_new();
7959
7960    cpu_tmp0 = tcg_temp_new();
7961    cpu_tmp1_i64 = tcg_temp_new_i64();
7962    cpu_tmp2_i32 = tcg_temp_new_i32();
7963    cpu_tmp3_i32 = tcg_temp_new_i32();
7964    cpu_tmp4 = tcg_temp_new();
7965    cpu_ptr0 = tcg_temp_new_ptr();
7966    cpu_ptr1 = tcg_temp_new_ptr();
7967    cpu_cc_srcT = tcg_temp_local_new();
7968
7969    dc->is_jmp = DISAS_NEXT;
7970    pc_ptr = pc_start;
7971    num_insns = 0;
7972    max_insns = tb->cflags & CF_COUNT_MASK;
7973    if (max_insns == 0) {
7974        max_insns = CF_COUNT_MASK;
7975    }
7976    if (max_insns > TCG_MAX_INSNS) {
7977        max_insns = TCG_MAX_INSNS;
7978    }
7979
7980    gen_tb_start(tb);
7981    for(;;) {
7982        tcg_gen_insn_start(pc_ptr, dc->cc_op);
7983        num_insns++;
7984
7985        /* If RF is set, suppress an internally generated breakpoint.  */
7986        if (unlikely(cpu_breakpoint_test(cs, pc_ptr,
7987                                         tb->flags & HF_RF_MASK
7988                                         ? BP_GDB : BP_ANY))) {
7989            gen_debug(dc, pc_ptr - dc->cs_base);
7990            /* The address covered by the breakpoint must be included in
7991               [tb->pc, tb->pc + tb->size) in order to for it to be
7992               properly cleared -- thus we increment the PC here so that
7993               the logic setting tb->size below does the right thing.  */
7994            pc_ptr += 1;
7995            goto done_generating;
7996        }
7997        if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
7998            gen_io_start();
7999        }
8000
8001        pc_ptr = disas_insn(env, dc, pc_ptr);
8002        /* stop translation if indicated */
8003        if (dc->is_jmp)
8004            break;
8005        /* if single step mode, we generate only one instruction and
8006           generate an exception */
8007        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8008           the flag and abort the translation to give the irqs a
8009           change to be happen */
8010        if (dc->tf || dc->singlestep_enabled ||
8011            (flags & HF_INHIBIT_IRQ_MASK)) {
8012            gen_jmp_im(pc_ptr - dc->cs_base);
8013            gen_eob(dc);
8014            break;
8015        }
8016        /* Do not cross the boundary of the pages in icount mode,
8017           it can cause an exception. Do it only when boundary is
8018           crossed by the first instruction in the block.
8019           If current instruction already crossed the bound - it's ok,
8020           because an exception hasn't stopped this code.
8021         */
8022        if ((tb->cflags & CF_USE_ICOUNT)
8023            && ((pc_ptr & TARGET_PAGE_MASK)
8024                != ((pc_ptr + TARGET_MAX_INSN_SIZE - 1) & TARGET_PAGE_MASK)
8025                || (pc_ptr & ~TARGET_PAGE_MASK) == 0)) {
8026            gen_jmp_im(pc_ptr - dc->cs_base);
8027            gen_eob(dc);
8028            break;
8029        }
8030        /* if too long translation, stop generation too */
8031        if (tcg_op_buf_full() ||
8032            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
8033            num_insns >= max_insns) {
8034            gen_jmp_im(pc_ptr - dc->cs_base);
8035            gen_eob(dc);
8036            break;
8037        }
8038        if (singlestep) {
8039            gen_jmp_im(pc_ptr - dc->cs_base);
8040            gen_eob(dc);
8041            break;
8042        }
8043    }
8044    if (tb->cflags & CF_LAST_IO)
8045        gen_io_end();
8046done_generating:
8047    gen_tb_end(tb, num_insns);
8048
8049#ifdef DEBUG_DISAS
8050    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
8051        int disas_flags;
8052        qemu_log("----------------\n");
8053        qemu_log("IN: %s\n", lookup_symbol(pc_start));
8054#ifdef TARGET_X86_64
8055        if (dc->code64)
8056            disas_flags = 2;
8057        else
8058#endif
8059            disas_flags = !dc->code32;
8060        log_target_disas(cs, pc_start, pc_ptr - pc_start, disas_flags);
8061        qemu_log("\n");
8062    }
8063#endif
8064
8065    tb->size = pc_ptr - pc_start;
8066    tb->icount = num_insns;
8067}
8068
8069void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8070                          target_ulong *data)
8071{
8072    int cc_op = data[1];
8073    env->eip = data[0] - tb->cs_base;
8074    if (cc_op != CC_OP_DYNAMIC) {
8075        env->cc_op = cc_op;
8076    }
8077}
8078