qemu/target-i386/translate.c
<<
>>
Prefs
   1/*
   2 *  i386 translation
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include <stdarg.h>
  20#include <stdlib.h>
  21#include <stdio.h>
  22#include <string.h>
  23#include <inttypes.h>
  24#include <signal.h>
  25
  26#include "qemu/host-utils.h"
  27#include "cpu.h"
  28#include "disas/disas.h"
  29#include "tcg-op.h"
  30
  31#include "helper.h"
  32#define GEN_HELPER 1
  33#include "helper.h"
  34
  35#define PREFIX_REPZ   0x01
  36#define PREFIX_REPNZ  0x02
  37#define PREFIX_LOCK   0x04
  38#define PREFIX_DATA   0x08
  39#define PREFIX_ADR    0x10
  40#define PREFIX_VEX    0x20
  41
  42#ifdef TARGET_X86_64
  43#define CODE64(s) ((s)->code64)
  44#define REX_X(s) ((s)->rex_x)
  45#define REX_B(s) ((s)->rex_b)
  46#else
  47#define CODE64(s) 0
  48#define REX_X(s) 0
  49#define REX_B(s) 0
  50#endif
  51
  52#ifdef TARGET_X86_64
  53# define ctztl  ctz64
  54# define clztl  clz64
  55#else
  56# define ctztl  ctz32
  57# define clztl  clz32
  58#endif
  59
  60//#define MACRO_TEST   1
  61
  62/* global register indexes */
  63static TCGv_ptr cpu_env;
  64static TCGv cpu_A0;
  65static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
  66static TCGv_i32 cpu_cc_op;
  67static TCGv cpu_regs[CPU_NB_REGS];
  68/* local temps */
  69static TCGv cpu_T[2];
  70/* local register indexes (only used inside old micro ops) */
  71static TCGv cpu_tmp0, cpu_tmp4;
  72static TCGv_ptr cpu_ptr0, cpu_ptr1;
  73static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
  74static TCGv_i64 cpu_tmp1_i64;
  75
  76static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
  77
  78#include "exec/gen-icount.h"
  79
  80#ifdef TARGET_X86_64
  81static int x86_64_hregs;
  82#endif
  83
  84typedef struct DisasContext {
  85    /* current insn context */
  86    int override; /* -1 if no override */
  87    int prefix;
  88    TCGMemOp aflag;
  89    TCGMemOp dflag;
  90    target_ulong pc; /* pc = eip + cs_base */
  91    int is_jmp; /* 1 = means jump (stop translation), 2 means CPU
  92                   static state change (stop translation) */
  93    /* current block context */
  94    target_ulong cs_base; /* base of CS segment */
  95    int pe;     /* protected mode */
  96    int code32; /* 32 bit code segment */
  97#ifdef TARGET_X86_64
  98    int lma;    /* long mode active */
  99    int code64; /* 64 bit code segment */
 100    int rex_x, rex_b;
 101#endif
 102    int vex_l;  /* vex vector length */
 103    int vex_v;  /* vex vvvv register, without 1's compliment.  */
 104    int ss32;   /* 32 bit stack segment */
 105    CCOp cc_op;  /* current CC operation */
 106    bool cc_op_dirty;
 107    int addseg; /* non zero if either DS/ES/SS have a non zero base */
 108    int f_st;   /* currently unused */
 109    int vm86;   /* vm86 mode */
 110    int cpl;
 111    int iopl;
 112    int tf;     /* TF cpu flag */
 113    int singlestep_enabled; /* "hardware" single step enabled */
 114    int jmp_opt; /* use direct block chaining for direct jumps */
 115    int mem_index; /* select memory access functions */
 116    uint64_t flags; /* all execution flags */
 117    struct TranslationBlock *tb;
 118    int popl_esp_hack; /* for correct popl with esp base handling */
 119    int rip_offset; /* only used in x86_64, but left for simplicity */
 120    int cpuid_features;
 121    int cpuid_ext_features;
 122    int cpuid_ext2_features;
 123    int cpuid_ext3_features;
 124    int cpuid_7_0_ebx_features;
 125} DisasContext;
 126
 127static void gen_eob(DisasContext *s);
 128static void gen_jmp(DisasContext *s, target_ulong eip);
 129static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
 130static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d);
 131
 132/* i386 arith/logic operations */
 133enum {
 134    OP_ADDL,
 135    OP_ORL,
 136    OP_ADCL,
 137    OP_SBBL,
 138    OP_ANDL,
 139    OP_SUBL,
 140    OP_XORL,
 141    OP_CMPL,
 142};
 143
 144/* i386 shift ops */
 145enum {
 146    OP_ROL,
 147    OP_ROR,
 148    OP_RCL,
 149    OP_RCR,
 150    OP_SHL,
 151    OP_SHR,
 152    OP_SHL1, /* undocumented */
 153    OP_SAR = 7,
 154};
 155
 156enum {
 157    JCC_O,
 158    JCC_B,
 159    JCC_Z,
 160    JCC_BE,
 161    JCC_S,
 162    JCC_P,
 163    JCC_L,
 164    JCC_LE,
 165};
 166
 167enum {
 168    /* I386 int registers */
 169    OR_EAX,   /* MUST be even numbered */
 170    OR_ECX,
 171    OR_EDX,
 172    OR_EBX,
 173    OR_ESP,
 174    OR_EBP,
 175    OR_ESI,
 176    OR_EDI,
 177
 178    OR_TMP0 = 16,    /* temporary operand register */
 179    OR_TMP1,
 180    OR_A0, /* temporary register used when doing address evaluation */
 181};
 182
 183enum {
 184    USES_CC_DST  = 1,
 185    USES_CC_SRC  = 2,
 186    USES_CC_SRC2 = 4,
 187    USES_CC_SRCT = 8,
 188};
 189
 190/* Bit set if the global variable is live after setting CC_OP to X.  */
 191static const uint8_t cc_op_live[CC_OP_NB] = {
 192    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 193    [CC_OP_EFLAGS] = USES_CC_SRC,
 194    [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
 195    [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
 196    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 197    [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
 198    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 199    [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
 200    [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
 201    [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
 202    [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
 203    [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
 204    [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
 205    [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
 206    [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
 207    [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
 208    [CC_OP_CLR] = 0,
 209};
 210
 211static void set_cc_op(DisasContext *s, CCOp op)
 212{
 213    int dead;
 214
 215    if (s->cc_op == op) {
 216        return;
 217    }
 218
 219    /* Discard CC computation that will no longer be used.  */
 220    dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
 221    if (dead & USES_CC_DST) {
 222        tcg_gen_discard_tl(cpu_cc_dst);
 223    }
 224    if (dead & USES_CC_SRC) {
 225        tcg_gen_discard_tl(cpu_cc_src);
 226    }
 227    if (dead & USES_CC_SRC2) {
 228        tcg_gen_discard_tl(cpu_cc_src2);
 229    }
 230    if (dead & USES_CC_SRCT) {
 231        tcg_gen_discard_tl(cpu_cc_srcT);
 232    }
 233
 234    if (op == CC_OP_DYNAMIC) {
 235        /* The DYNAMIC setting is translator only, and should never be
 236           stored.  Thus we always consider it clean.  */
 237        s->cc_op_dirty = false;
 238    } else {
 239        /* Discard any computed CC_OP value (see shifts).  */
 240        if (s->cc_op == CC_OP_DYNAMIC) {
 241            tcg_gen_discard_i32(cpu_cc_op);
 242        }
 243        s->cc_op_dirty = true;
 244    }
 245    s->cc_op = op;
 246}
 247
 248static void gen_update_cc_op(DisasContext *s)
 249{
 250    if (s->cc_op_dirty) {
 251        tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
 252        s->cc_op_dirty = false;
 253    }
 254}
 255
 256#ifdef TARGET_X86_64
 257
 258#define NB_OP_SIZES 4
 259
 260#else /* !TARGET_X86_64 */
 261
 262#define NB_OP_SIZES 3
 263
 264#endif /* !TARGET_X86_64 */
 265
 266#if defined(HOST_WORDS_BIGENDIAN)
 267#define REG_B_OFFSET (sizeof(target_ulong) - 1)
 268#define REG_H_OFFSET (sizeof(target_ulong) - 2)
 269#define REG_W_OFFSET (sizeof(target_ulong) - 2)
 270#define REG_L_OFFSET (sizeof(target_ulong) - 4)
 271#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
 272#else
 273#define REG_B_OFFSET 0
 274#define REG_H_OFFSET 1
 275#define REG_W_OFFSET 0
 276#define REG_L_OFFSET 0
 277#define REG_LH_OFFSET 4
 278#endif
 279
 280/* In instruction encodings for byte register accesses the
 281 * register number usually indicates "low 8 bits of register N";
 282 * however there are some special cases where N 4..7 indicates
 283 * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
 284 * true for this special case, false otherwise.
 285 */
 286static inline bool byte_reg_is_xH(int reg)
 287{
 288    if (reg < 4) {
 289        return false;
 290    }
 291#ifdef TARGET_X86_64
 292    if (reg >= 8 || x86_64_hregs) {
 293        return false;
 294    }
 295#endif
 296    return true;
 297}
 298
 299/* Select the size of a push/pop operation.  */
 300static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot)
 301{
 302    if (CODE64(s)) {
 303        return ot == MO_16 ? MO_16 : MO_64;
 304    } else {
 305        return ot;
 306    }
 307}
 308
 309/* Select only size 64 else 32.  Used for SSE operand sizes.  */
 310static inline TCGMemOp mo_64_32(TCGMemOp ot)
 311{
 312#ifdef TARGET_X86_64
 313    return ot == MO_64 ? MO_64 : MO_32;
 314#else
 315    return MO_32;
 316#endif
 317}
 318
 319/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
 320   byte vs word opcodes.  */
 321static inline TCGMemOp mo_b_d(int b, TCGMemOp ot)
 322{
 323    return b & 1 ? ot : MO_8;
 324}
 325
 326/* Select size 8 if lsb of B is clear, else OT capped at 32.
 327   Used for decoding operand size of port opcodes.  */
 328static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot)
 329{
 330    return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
 331}
 332
 333static void gen_op_mov_reg_v(TCGMemOp ot, int reg, TCGv t0)
 334{
 335    switch(ot) {
 336    case MO_8:
 337        if (!byte_reg_is_xH(reg)) {
 338            tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
 339        } else {
 340            tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
 341        }
 342        break;
 343    case MO_16:
 344        tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
 345        break;
 346    case MO_32:
 347        /* For x86_64, this sets the higher half of register to zero.
 348           For i386, this is equivalent to a mov. */
 349        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
 350        break;
 351#ifdef TARGET_X86_64
 352    case MO_64:
 353        tcg_gen_mov_tl(cpu_regs[reg], t0);
 354        break;
 355#endif
 356    default:
 357        tcg_abort();
 358    }
 359}
 360
 361static inline void gen_op_mov_v_reg(TCGMemOp ot, TCGv t0, int reg)
 362{
 363    if (ot == MO_8 && byte_reg_is_xH(reg)) {
 364        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
 365        tcg_gen_ext8u_tl(t0, t0);
 366    } else {
 367        tcg_gen_mov_tl(t0, cpu_regs[reg]);
 368    }
 369}
 370
 371static inline void gen_op_movl_A0_reg(int reg)
 372{
 373    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
 374}
 375
 376static inline void gen_op_addl_A0_im(int32_t val)
 377{
 378    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 379#ifdef TARGET_X86_64
 380    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
 381#endif
 382}
 383
 384#ifdef TARGET_X86_64
 385static inline void gen_op_addq_A0_im(int64_t val)
 386{
 387    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
 388}
 389#endif
 390    
 391static void gen_add_A0_im(DisasContext *s, int val)
 392{
 393#ifdef TARGET_X86_64
 394    if (CODE64(s))
 395        gen_op_addq_A0_im(val);
 396    else
 397#endif
 398        gen_op_addl_A0_im(val);
 399}
 400
 401static inline void gen_op_jmp_v(TCGv dest)
 402{
 403    tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
 404}
 405
 406static inline void gen_op_add_reg_im(TCGMemOp size, int reg, int32_t val)
 407{
 408    tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val);
 409    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 410}
 411
 412static inline void gen_op_add_reg_T0(TCGMemOp size, int reg)
 413{
 414    tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]);
 415    gen_op_mov_reg_v(size, reg, cpu_tmp0);
 416}
 417
 418static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 419{
 420    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
 421    if (shift != 0)
 422        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
 423    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 424    /* For x86_64, this sets the higher half of register to zero.
 425       For i386, this is equivalent to a nop. */
 426    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
 427}
 428
 429static inline void gen_op_movl_A0_seg(int reg)
 430{
 431    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base) + REG_L_OFFSET);
 432}
 433
 434static inline void gen_op_addl_A0_seg(DisasContext *s, int reg)
 435{
 436    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
 437#ifdef TARGET_X86_64
 438    if (CODE64(s)) {
 439        tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
 440        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 441    } else {
 442        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 443        tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
 444    }
 445#else
 446    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 447#endif
 448}
 449
 450#ifdef TARGET_X86_64
 451static inline void gen_op_movq_A0_seg(int reg)
 452{
 453    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base));
 454}
 455
 456static inline void gen_op_addq_A0_seg(int reg)
 457{
 458    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
 459    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 460}
 461
 462static inline void gen_op_movq_A0_reg(int reg)
 463{
 464    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
 465}
 466
 467static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
 468{
 469    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
 470    if (shift != 0)
 471        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
 472    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 473}
 474#endif
 475
 476static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 477{
 478    tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
 479}
 480
 481static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
 482{
 483    tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 484}
 485
 486static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
 487{
 488    if (d == OR_TMP0) {
 489        gen_op_st_v(s, idx, cpu_T[0], cpu_A0);
 490    } else {
 491        gen_op_mov_reg_v(idx, d, cpu_T[0]);
 492    }
 493}
 494
 495static inline void gen_jmp_im(target_ulong pc)
 496{
 497    tcg_gen_movi_tl(cpu_tmp0, pc);
 498    gen_op_jmp_v(cpu_tmp0);
 499}
 500
 501static inline void gen_string_movl_A0_ESI(DisasContext *s)
 502{
 503    int override;
 504
 505    override = s->override;
 506    switch (s->aflag) {
 507#ifdef TARGET_X86_64
 508    case MO_64:
 509        if (override >= 0) {
 510            gen_op_movq_A0_seg(override);
 511            gen_op_addq_A0_reg_sN(0, R_ESI);
 512        } else {
 513            gen_op_movq_A0_reg(R_ESI);
 514        }
 515        break;
 516#endif
 517    case MO_32:
 518        /* 32 bit address */
 519        if (s->addseg && override < 0)
 520            override = R_DS;
 521        if (override >= 0) {
 522            gen_op_movl_A0_seg(override);
 523            gen_op_addl_A0_reg_sN(0, R_ESI);
 524        } else {
 525            gen_op_movl_A0_reg(R_ESI);
 526        }
 527        break;
 528    case MO_16:
 529        /* 16 address, always override */
 530        if (override < 0)
 531            override = R_DS;
 532        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESI]);
 533        gen_op_addl_A0_seg(s, override);
 534        break;
 535    default:
 536        tcg_abort();
 537    }
 538}
 539
 540static inline void gen_string_movl_A0_EDI(DisasContext *s)
 541{
 542    switch (s->aflag) {
 543#ifdef TARGET_X86_64
 544    case MO_64:
 545        gen_op_movq_A0_reg(R_EDI);
 546        break;
 547#endif
 548    case MO_32:
 549        if (s->addseg) {
 550            gen_op_movl_A0_seg(R_ES);
 551            gen_op_addl_A0_reg_sN(0, R_EDI);
 552        } else {
 553            gen_op_movl_A0_reg(R_EDI);
 554        }
 555        break;
 556    case MO_16:
 557        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_EDI]);
 558        gen_op_addl_A0_seg(s, R_ES);
 559        break;
 560    default:
 561        tcg_abort();
 562    }
 563}
 564
 565static inline void gen_op_movl_T0_Dshift(TCGMemOp ot)
 566{
 567    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, df));
 568    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
 569};
 570
 571static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign)
 572{
 573    switch (size) {
 574    case MO_8:
 575        if (sign) {
 576            tcg_gen_ext8s_tl(dst, src);
 577        } else {
 578            tcg_gen_ext8u_tl(dst, src);
 579        }
 580        return dst;
 581    case MO_16:
 582        if (sign) {
 583            tcg_gen_ext16s_tl(dst, src);
 584        } else {
 585            tcg_gen_ext16u_tl(dst, src);
 586        }
 587        return dst;
 588#ifdef TARGET_X86_64
 589    case MO_32:
 590        if (sign) {
 591            tcg_gen_ext32s_tl(dst, src);
 592        } else {
 593            tcg_gen_ext32u_tl(dst, src);
 594        }
 595        return dst;
 596#endif
 597    default:
 598        return src;
 599    }
 600}
 601
 602static void gen_extu(TCGMemOp ot, TCGv reg)
 603{
 604    gen_ext_tl(reg, reg, ot, false);
 605}
 606
 607static void gen_exts(TCGMemOp ot, TCGv reg)
 608{
 609    gen_ext_tl(reg, reg, ot, true);
 610}
 611
 612static inline void gen_op_jnz_ecx(TCGMemOp size, int label1)
 613{
 614    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 615    gen_extu(size, cpu_tmp0);
 616    tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
 617}
 618
 619static inline void gen_op_jz_ecx(TCGMemOp size, int label1)
 620{
 621    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
 622    gen_extu(size, cpu_tmp0);
 623    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
 624}
 625
 626static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n)
 627{
 628    switch (ot) {
 629    case MO_8:
 630        gen_helper_inb(v, n);
 631        break;
 632    case MO_16:
 633        gen_helper_inw(v, n);
 634        break;
 635    case MO_32:
 636        gen_helper_inl(v, n);
 637        break;
 638    default:
 639        tcg_abort();
 640    }
 641}
 642
 643static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n)
 644{
 645    switch (ot) {
 646    case MO_8:
 647        gen_helper_outb(v, n);
 648        break;
 649    case MO_16:
 650        gen_helper_outw(v, n);
 651        break;
 652    case MO_32:
 653        gen_helper_outl(v, n);
 654        break;
 655    default:
 656        tcg_abort();
 657    }
 658}
 659
 660static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip,
 661                         uint32_t svm_flags)
 662{
 663    int state_saved;
 664    target_ulong next_eip;
 665
 666    state_saved = 0;
 667    if (s->pe && (s->cpl > s->iopl || s->vm86)) {
 668        gen_update_cc_op(s);
 669        gen_jmp_im(cur_eip);
 670        state_saved = 1;
 671        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
 672        switch (ot) {
 673        case MO_8:
 674            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
 675            break;
 676        case MO_16:
 677            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
 678            break;
 679        case MO_32:
 680            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
 681            break;
 682        default:
 683            tcg_abort();
 684        }
 685    }
 686    if(s->flags & HF_SVMI_MASK) {
 687        if (!state_saved) {
 688            gen_update_cc_op(s);
 689            gen_jmp_im(cur_eip);
 690        }
 691        svm_flags |= (1 << (4 + ot));
 692        next_eip = s->pc - s->cs_base;
 693        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
 694        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
 695                                tcg_const_i32(svm_flags),
 696                                tcg_const_i32(next_eip - cur_eip));
 697    }
 698}
 699
 700static inline void gen_movs(DisasContext *s, TCGMemOp ot)
 701{
 702    gen_string_movl_A0_ESI(s);
 703    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
 704    gen_string_movl_A0_EDI(s);
 705    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
 706    gen_op_movl_T0_Dshift(ot);
 707    gen_op_add_reg_T0(s->aflag, R_ESI);
 708    gen_op_add_reg_T0(s->aflag, R_EDI);
 709}
 710
 711static void gen_op_update1_cc(void)
 712{
 713    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 714}
 715
 716static void gen_op_update2_cc(void)
 717{
 718    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
 719    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 720}
 721
 722static void gen_op_update3_cc(TCGv reg)
 723{
 724    tcg_gen_mov_tl(cpu_cc_src2, reg);
 725    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
 726    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 727}
 728
 729static inline void gen_op_testl_T0_T1_cc(void)
 730{
 731    tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
 732}
 733
 734static void gen_op_update_neg_cc(void)
 735{
 736    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 737    tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]);
 738    tcg_gen_movi_tl(cpu_cc_srcT, 0);
 739}
 740
 741/* compute all eflags to cc_src */
 742static void gen_compute_eflags(DisasContext *s)
 743{
 744    TCGv zero, dst, src1, src2;
 745    int live, dead;
 746
 747    if (s->cc_op == CC_OP_EFLAGS) {
 748        return;
 749    }
 750    if (s->cc_op == CC_OP_CLR) {
 751        tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
 752        set_cc_op(s, CC_OP_EFLAGS);
 753        return;
 754    }
 755
 756    TCGV_UNUSED(zero);
 757    dst = cpu_cc_dst;
 758    src1 = cpu_cc_src;
 759    src2 = cpu_cc_src2;
 760
 761    /* Take care to not read values that are not live.  */
 762    live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
 763    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
 764    if (dead) {
 765        zero = tcg_const_tl(0);
 766        if (dead & USES_CC_DST) {
 767            dst = zero;
 768        }
 769        if (dead & USES_CC_SRC) {
 770            src1 = zero;
 771        }
 772        if (dead & USES_CC_SRC2) {
 773            src2 = zero;
 774        }
 775    }
 776
 777    gen_update_cc_op(s);
 778    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
 779    set_cc_op(s, CC_OP_EFLAGS);
 780
 781    if (dead) {
 782        tcg_temp_free(zero);
 783    }
 784}
 785
 786typedef struct CCPrepare {
 787    TCGCond cond;
 788    TCGv reg;
 789    TCGv reg2;
 790    target_ulong imm;
 791    target_ulong mask;
 792    bool use_reg2;
 793    bool no_setcond;
 794} CCPrepare;
 795
 796/* compute eflags.C to reg */
 797static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
 798{
 799    TCGv t0, t1;
 800    int size, shift;
 801
 802    switch (s->cc_op) {
 803    case CC_OP_SUBB ... CC_OP_SUBQ:
 804        /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
 805        size = s->cc_op - CC_OP_SUBB;
 806        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 807        /* If no temporary was used, be careful not to alias t1 and t0.  */
 808        t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg;
 809        tcg_gen_mov_tl(t0, cpu_cc_srcT);
 810        gen_extu(size, t0);
 811        goto add_sub;
 812
 813    case CC_OP_ADDB ... CC_OP_ADDQ:
 814        /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
 815        size = s->cc_op - CC_OP_ADDB;
 816        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 817        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 818    add_sub:
 819        return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
 820                             .reg2 = t1, .mask = -1, .use_reg2 = true };
 821
 822    case CC_OP_LOGICB ... CC_OP_LOGICQ:
 823    case CC_OP_CLR:
 824        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 825
 826    case CC_OP_INCB ... CC_OP_INCQ:
 827    case CC_OP_DECB ... CC_OP_DECQ:
 828        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 829                             .mask = -1, .no_setcond = true };
 830
 831    case CC_OP_SHLB ... CC_OP_SHLQ:
 832        /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
 833        size = s->cc_op - CC_OP_SHLB;
 834        shift = (8 << size) - 1;
 835        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 836                             .mask = (target_ulong)1 << shift };
 837
 838    case CC_OP_MULB ... CC_OP_MULQ:
 839        return (CCPrepare) { .cond = TCG_COND_NE,
 840                             .reg = cpu_cc_src, .mask = -1 };
 841
 842    case CC_OP_BMILGB ... CC_OP_BMILGQ:
 843        size = s->cc_op - CC_OP_BMILGB;
 844        t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
 845        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 846
 847    case CC_OP_ADCX:
 848    case CC_OP_ADCOX:
 849        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
 850                             .mask = -1, .no_setcond = true };
 851
 852    case CC_OP_EFLAGS:
 853    case CC_OP_SARB ... CC_OP_SARQ:
 854        /* CC_SRC & 1 */
 855        return (CCPrepare) { .cond = TCG_COND_NE,
 856                             .reg = cpu_cc_src, .mask = CC_C };
 857
 858    default:
 859       /* The need to compute only C from CC_OP_DYNAMIC is important
 860          in efficiently implementing e.g. INC at the start of a TB.  */
 861       gen_update_cc_op(s);
 862       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
 863                               cpu_cc_src2, cpu_cc_op);
 864       return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
 865                            .mask = -1, .no_setcond = true };
 866    }
 867}
 868
 869/* compute eflags.P to reg */
 870static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
 871{
 872    gen_compute_eflags(s);
 873    return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 874                         .mask = CC_P };
 875}
 876
 877/* compute eflags.S to reg */
 878static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
 879{
 880    switch (s->cc_op) {
 881    case CC_OP_DYNAMIC:
 882        gen_compute_eflags(s);
 883        /* FALLTHRU */
 884    case CC_OP_EFLAGS:
 885    case CC_OP_ADCX:
 886    case CC_OP_ADOX:
 887    case CC_OP_ADCOX:
 888        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 889                             .mask = CC_S };
 890    case CC_OP_CLR:
 891        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 892    default:
 893        {
 894            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 895            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
 896            return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
 897        }
 898    }
 899}
 900
 901/* compute eflags.O to reg */
 902static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
 903{
 904    switch (s->cc_op) {
 905    case CC_OP_ADOX:
 906    case CC_OP_ADCOX:
 907        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
 908                             .mask = -1, .no_setcond = true };
 909    case CC_OP_CLR:
 910        return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 911    default:
 912        gen_compute_eflags(s);
 913        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 914                             .mask = CC_O };
 915    }
 916}
 917
 918/* compute eflags.Z to reg */
 919static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
 920{
 921    switch (s->cc_op) {
 922    case CC_OP_DYNAMIC:
 923        gen_compute_eflags(s);
 924        /* FALLTHRU */
 925    case CC_OP_EFLAGS:
 926    case CC_OP_ADCX:
 927    case CC_OP_ADOX:
 928    case CC_OP_ADCOX:
 929        return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
 930                             .mask = CC_Z };
 931    case CC_OP_CLR:
 932        return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
 933    default:
 934        {
 935            TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3;
 936            TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
 937            return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 938        }
 939    }
 940}
 941
 942/* perform a conditional store into register 'reg' according to jump opcode
 943   value 'b'. In the fast case, T0 is guaranted not to be used. */
 944static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
 945{
 946    int inv, jcc_op, cond;
 947    TCGMemOp size;
 948    CCPrepare cc;
 949    TCGv t0;
 950
 951    inv = b & 1;
 952    jcc_op = (b >> 1) & 7;
 953
 954    switch (s->cc_op) {
 955    case CC_OP_SUBB ... CC_OP_SUBQ:
 956        /* We optimize relational operators for the cmp/jcc case.  */
 957        size = s->cc_op - CC_OP_SUBB;
 958        switch (jcc_op) {
 959        case JCC_BE:
 960            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 961            gen_extu(size, cpu_tmp4);
 962            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
 963            cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4,
 964                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 965            break;
 966
 967        case JCC_L:
 968            cond = TCG_COND_LT;
 969            goto fast_jcc_l;
 970        case JCC_LE:
 971            cond = TCG_COND_LE;
 972        fast_jcc_l:
 973            tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT);
 974            gen_exts(size, cpu_tmp4);
 975            t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true);
 976            cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4,
 977                               .reg2 = t0, .mask = -1, .use_reg2 = true };
 978            break;
 979
 980        default:
 981            goto slow_jcc;
 982        }
 983        break;
 984
 985    default:
 986    slow_jcc:
 987        /* This actually generates good code for JC, JZ and JS.  */
 988        switch (jcc_op) {
 989        case JCC_O:
 990            cc = gen_prepare_eflags_o(s, reg);
 991            break;
 992        case JCC_B:
 993            cc = gen_prepare_eflags_c(s, reg);
 994            break;
 995        case JCC_Z:
 996            cc = gen_prepare_eflags_z(s, reg);
 997            break;
 998        case JCC_BE:
 999            gen_compute_eflags(s);
1000            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1001                               .mask = CC_Z | CC_C };
1002            break;
1003        case JCC_S:
1004            cc = gen_prepare_eflags_s(s, reg);
1005            break;
1006        case JCC_P:
1007            cc = gen_prepare_eflags_p(s, reg);
1008            break;
1009        case JCC_L:
1010            gen_compute_eflags(s);
1011            if (TCGV_EQUAL(reg, cpu_cc_src)) {
1012                reg = cpu_tmp0;
1013            }
1014            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1015            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1016            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1017                               .mask = CC_S };
1018            break;
1019        default:
1020        case JCC_LE:
1021            gen_compute_eflags(s);
1022            if (TCGV_EQUAL(reg, cpu_cc_src)) {
1023                reg = cpu_tmp0;
1024            }
1025            tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1026            tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1027            cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1028                               .mask = CC_S | CC_Z };
1029            break;
1030        }
1031        break;
1032    }
1033
1034    if (inv) {
1035        cc.cond = tcg_invert_cond(cc.cond);
1036    }
1037    return cc;
1038}
1039
1040static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1041{
1042    CCPrepare cc = gen_prepare_cc(s, b, reg);
1043
1044    if (cc.no_setcond) {
1045        if (cc.cond == TCG_COND_EQ) {
1046            tcg_gen_xori_tl(reg, cc.reg, 1);
1047        } else {
1048            tcg_gen_mov_tl(reg, cc.reg);
1049        }
1050        return;
1051    }
1052
1053    if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1054        cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1055        tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1056        tcg_gen_andi_tl(reg, reg, 1);
1057        return;
1058    }
1059    if (cc.mask != -1) {
1060        tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1061        cc.reg = reg;
1062    }
1063    if (cc.use_reg2) {
1064        tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1065    } else {
1066        tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1067    }
1068}
1069
1070static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1071{
1072    gen_setcc1(s, JCC_B << 1, reg);
1073}
1074
1075/* generate a conditional jump to label 'l1' according to jump opcode
1076   value 'b'. In the fast case, T0 is guaranted not to be used. */
1077static inline void gen_jcc1_noeob(DisasContext *s, int b, int l1)
1078{
1079    CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
1080
1081    if (cc.mask != -1) {
1082        tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
1083        cc.reg = cpu_T[0];
1084    }
1085    if (cc.use_reg2) {
1086        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1087    } else {
1088        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1089    }
1090}
1091
1092/* Generate a conditional jump to label 'l1' according to jump opcode
1093   value 'b'. In the fast case, T0 is guaranted not to be used.
1094   A translation block must end soon.  */
1095static inline void gen_jcc1(DisasContext *s, int b, int l1)
1096{
1097    CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
1098
1099    gen_update_cc_op(s);
1100    if (cc.mask != -1) {
1101        tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
1102        cc.reg = cpu_T[0];
1103    }
1104    set_cc_op(s, CC_OP_DYNAMIC);
1105    if (cc.use_reg2) {
1106        tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1107    } else {
1108        tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1109    }
1110}
1111
1112/* XXX: does not work with gdbstub "ice" single step - not a
1113   serious problem */
1114static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1115{
1116    int l1, l2;
1117
1118    l1 = gen_new_label();
1119    l2 = gen_new_label();
1120    gen_op_jnz_ecx(s->aflag, l1);
1121    gen_set_label(l2);
1122    gen_jmp_tb(s, next_eip, 1);
1123    gen_set_label(l1);
1124    return l2;
1125}
1126
1127static inline void gen_stos(DisasContext *s, TCGMemOp ot)
1128{
1129    gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
1130    gen_string_movl_A0_EDI(s);
1131    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1132    gen_op_movl_T0_Dshift(ot);
1133    gen_op_add_reg_T0(s->aflag, R_EDI);
1134}
1135
1136static inline void gen_lods(DisasContext *s, TCGMemOp ot)
1137{
1138    gen_string_movl_A0_ESI(s);
1139    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1140    gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
1141    gen_op_movl_T0_Dshift(ot);
1142    gen_op_add_reg_T0(s->aflag, R_ESI);
1143}
1144
1145static inline void gen_scas(DisasContext *s, TCGMemOp ot)
1146{
1147    gen_string_movl_A0_EDI(s);
1148    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
1149    gen_op(s, OP_CMPL, ot, R_EAX);
1150    gen_op_movl_T0_Dshift(ot);
1151    gen_op_add_reg_T0(s->aflag, R_EDI);
1152}
1153
1154static inline void gen_cmps(DisasContext *s, TCGMemOp ot)
1155{
1156    gen_string_movl_A0_EDI(s);
1157    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
1158    gen_string_movl_A0_ESI(s);
1159    gen_op(s, OP_CMPL, ot, OR_TMP0);
1160    gen_op_movl_T0_Dshift(ot);
1161    gen_op_add_reg_T0(s->aflag, R_ESI);
1162    gen_op_add_reg_T0(s->aflag, R_EDI);
1163}
1164
1165static inline void gen_ins(DisasContext *s, TCGMemOp ot)
1166{
1167    if (use_icount)
1168        gen_io_start();
1169    gen_string_movl_A0_EDI(s);
1170    /* Note: we must do this dummy write first to be restartable in
1171       case of page fault. */
1172    tcg_gen_movi_tl(cpu_T[0], 0);
1173    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1174    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1175    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1176    gen_helper_in_func(ot, cpu_T[0], cpu_tmp2_i32);
1177    gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
1178    gen_op_movl_T0_Dshift(ot);
1179    gen_op_add_reg_T0(s->aflag, R_EDI);
1180    if (use_icount)
1181        gen_io_end();
1182}
1183
1184static inline void gen_outs(DisasContext *s, TCGMemOp ot)
1185{
1186    if (use_icount)
1187        gen_io_start();
1188    gen_string_movl_A0_ESI(s);
1189    gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1190
1191    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[R_EDX]);
1192    tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
1193    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[0]);
1194    gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
1195
1196    gen_op_movl_T0_Dshift(ot);
1197    gen_op_add_reg_T0(s->aflag, R_ESI);
1198    if (use_icount)
1199        gen_io_end();
1200}
1201
1202/* same method as Valgrind : we generate jumps to current or next
1203   instruction */
1204#define GEN_REPZ(op)                                                          \
1205static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1206                                 target_ulong cur_eip, target_ulong next_eip) \
1207{                                                                             \
1208    int l2;\
1209    gen_update_cc_op(s);                                                      \
1210    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1211    gen_ ## op(s, ot);                                                        \
1212    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1213    /* a loop would cause two single step exceptions if ECX = 1               \
1214       before rep string_insn */                                              \
1215    if (!s->jmp_opt)                                                          \
1216        gen_op_jz_ecx(s->aflag, l2);                                          \
1217    gen_jmp(s, cur_eip);                                                      \
1218}
1219
1220#define GEN_REPZ2(op)                                                         \
1221static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot,              \
1222                                   target_ulong cur_eip,                      \
1223                                   target_ulong next_eip,                     \
1224                                   int nz)                                    \
1225{                                                                             \
1226    int l2;\
1227    gen_update_cc_op(s);                                                      \
1228    l2 = gen_jz_ecx_string(s, next_eip);                                      \
1229    gen_ ## op(s, ot);                                                        \
1230    gen_op_add_reg_im(s->aflag, R_ECX, -1);                                   \
1231    gen_update_cc_op(s);                                                      \
1232    gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1233    if (!s->jmp_opt)                                                          \
1234        gen_op_jz_ecx(s->aflag, l2);                                          \
1235    gen_jmp(s, cur_eip);                                                      \
1236}
1237
1238GEN_REPZ(movs)
1239GEN_REPZ(stos)
1240GEN_REPZ(lods)
1241GEN_REPZ(ins)
1242GEN_REPZ(outs)
1243GEN_REPZ2(scas)
1244GEN_REPZ2(cmps)
1245
1246static void gen_helper_fp_arith_ST0_FT0(int op)
1247{
1248    switch (op) {
1249    case 0:
1250        gen_helper_fadd_ST0_FT0(cpu_env);
1251        break;
1252    case 1:
1253        gen_helper_fmul_ST0_FT0(cpu_env);
1254        break;
1255    case 2:
1256        gen_helper_fcom_ST0_FT0(cpu_env);
1257        break;
1258    case 3:
1259        gen_helper_fcom_ST0_FT0(cpu_env);
1260        break;
1261    case 4:
1262        gen_helper_fsub_ST0_FT0(cpu_env);
1263        break;
1264    case 5:
1265        gen_helper_fsubr_ST0_FT0(cpu_env);
1266        break;
1267    case 6:
1268        gen_helper_fdiv_ST0_FT0(cpu_env);
1269        break;
1270    case 7:
1271        gen_helper_fdivr_ST0_FT0(cpu_env);
1272        break;
1273    }
1274}
1275
1276/* NOTE the exception in "r" op ordering */
1277static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1278{
1279    TCGv_i32 tmp = tcg_const_i32(opreg);
1280    switch (op) {
1281    case 0:
1282        gen_helper_fadd_STN_ST0(cpu_env, tmp);
1283        break;
1284    case 1:
1285        gen_helper_fmul_STN_ST0(cpu_env, tmp);
1286        break;
1287    case 4:
1288        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1289        break;
1290    case 5:
1291        gen_helper_fsub_STN_ST0(cpu_env, tmp);
1292        break;
1293    case 6:
1294        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1295        break;
1296    case 7:
1297        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1298        break;
1299    }
1300}
1301
1302/* if d == OR_TMP0, it means memory operand (address in A0) */
1303static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
1304{
1305    if (d != OR_TMP0) {
1306        gen_op_mov_v_reg(ot, cpu_T[0], d);
1307    } else {
1308        gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
1309    }
1310    switch(op) {
1311    case OP_ADCL:
1312        gen_compute_eflags_c(s1, cpu_tmp4);
1313        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1314        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1315        gen_op_st_rm_T0_A0(s1, ot, d);
1316        gen_op_update3_cc(cpu_tmp4);
1317        set_cc_op(s1, CC_OP_ADCB + ot);
1318        break;
1319    case OP_SBBL:
1320        gen_compute_eflags_c(s1, cpu_tmp4);
1321        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1322        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
1323        gen_op_st_rm_T0_A0(s1, ot, d);
1324        gen_op_update3_cc(cpu_tmp4);
1325        set_cc_op(s1, CC_OP_SBBB + ot);
1326        break;
1327    case OP_ADDL:
1328        tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1329        gen_op_st_rm_T0_A0(s1, ot, d);
1330        gen_op_update2_cc();
1331        set_cc_op(s1, CC_OP_ADDB + ot);
1332        break;
1333    case OP_SUBL:
1334        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
1335        tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1336        gen_op_st_rm_T0_A0(s1, ot, d);
1337        gen_op_update2_cc();
1338        set_cc_op(s1, CC_OP_SUBB + ot);
1339        break;
1340    default:
1341    case OP_ANDL:
1342        tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1343        gen_op_st_rm_T0_A0(s1, ot, d);
1344        gen_op_update1_cc();
1345        set_cc_op(s1, CC_OP_LOGICB + ot);
1346        break;
1347    case OP_ORL:
1348        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1349        gen_op_st_rm_T0_A0(s1, ot, d);
1350        gen_op_update1_cc();
1351        set_cc_op(s1, CC_OP_LOGICB + ot);
1352        break;
1353    case OP_XORL:
1354        tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1355        gen_op_st_rm_T0_A0(s1, ot, d);
1356        gen_op_update1_cc();
1357        set_cc_op(s1, CC_OP_LOGICB + ot);
1358        break;
1359    case OP_CMPL:
1360        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
1361        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]);
1362        tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
1363        set_cc_op(s1, CC_OP_SUBB + ot);
1364        break;
1365    }
1366}
1367
1368/* if d == OR_TMP0, it means memory operand (address in A0) */
1369static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
1370{
1371    if (d != OR_TMP0) {
1372        gen_op_mov_v_reg(ot, cpu_T[0], d);
1373    } else {
1374        gen_op_ld_v(s1, ot, cpu_T[0], cpu_A0);
1375    }
1376    gen_compute_eflags_c(s1, cpu_cc_src);
1377    if (c > 0) {
1378        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
1379        set_cc_op(s1, CC_OP_INCB + ot);
1380    } else {
1381        tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1);
1382        set_cc_op(s1, CC_OP_DECB + ot);
1383    }
1384    gen_op_st_rm_T0_A0(s1, ot, d);
1385    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1386}
1387
1388static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
1389                            TCGv shm1, TCGv count, bool is_right)
1390{
1391    TCGv_i32 z32, s32, oldop;
1392    TCGv z_tl;
1393
1394    /* Store the results into the CC variables.  If we know that the
1395       variable must be dead, store unconditionally.  Otherwise we'll
1396       need to not disrupt the current contents.  */
1397    z_tl = tcg_const_tl(0);
1398    if (cc_op_live[s->cc_op] & USES_CC_DST) {
1399        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1400                           result, cpu_cc_dst);
1401    } else {
1402        tcg_gen_mov_tl(cpu_cc_dst, result);
1403    }
1404    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1405        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1406                           shm1, cpu_cc_src);
1407    } else {
1408        tcg_gen_mov_tl(cpu_cc_src, shm1);
1409    }
1410    tcg_temp_free(z_tl);
1411
1412    /* Get the two potential CC_OP values into temporaries.  */
1413    tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1414    if (s->cc_op == CC_OP_DYNAMIC) {
1415        oldop = cpu_cc_op;
1416    } else {
1417        tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
1418        oldop = cpu_tmp3_i32;
1419    }
1420
1421    /* Conditionally store the CC_OP value.  */
1422    z32 = tcg_const_i32(0);
1423    s32 = tcg_temp_new_i32();
1424    tcg_gen_trunc_tl_i32(s32, count);
1425    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
1426    tcg_temp_free_i32(z32);
1427    tcg_temp_free_i32(s32);
1428
1429    /* The CC_OP value is no longer predictable.  */
1430    set_cc_op(s, CC_OP_DYNAMIC);
1431}
1432
1433static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1434                            int is_right, int is_arith)
1435{
1436    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1437
1438    /* load */
1439    if (op1 == OR_TMP0) {
1440        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1441    } else {
1442        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1443    }
1444
1445    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1446    tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1);
1447
1448    if (is_right) {
1449        if (is_arith) {
1450            gen_exts(ot, cpu_T[0]);
1451            tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1452            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1453        } else {
1454            gen_extu(ot, cpu_T[0]);
1455            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1456            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1457        }
1458    } else {
1459        tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1460        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1461    }
1462
1463    /* store */
1464    gen_op_st_rm_T0_A0(s, ot, op1);
1465
1466    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
1467}
1468
1469static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1470                            int is_right, int is_arith)
1471{
1472    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1473
1474    /* load */
1475    if (op1 == OR_TMP0)
1476        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1477    else
1478        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1479
1480    op2 &= mask;
1481    if (op2 != 0) {
1482        if (is_right) {
1483            if (is_arith) {
1484                gen_exts(ot, cpu_T[0]);
1485                tcg_gen_sari_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1486                tcg_gen_sari_tl(cpu_T[0], cpu_T[0], op2);
1487            } else {
1488                gen_extu(ot, cpu_T[0]);
1489                tcg_gen_shri_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1490                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], op2);
1491            }
1492        } else {
1493            tcg_gen_shli_tl(cpu_tmp4, cpu_T[0], op2 - 1);
1494            tcg_gen_shli_tl(cpu_T[0], cpu_T[0], op2);
1495        }
1496    }
1497
1498    /* store */
1499    gen_op_st_rm_T0_A0(s, ot, op1);
1500
1501    /* update eflags if non zero shift */
1502    if (op2 != 0) {
1503        tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
1504        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
1505        set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1506    }
1507}
1508
1509static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2)
1510{
1511    if (arg2 >= 0)
1512        tcg_gen_shli_tl(ret, arg1, arg2);
1513    else
1514        tcg_gen_shri_tl(ret, arg1, -arg2);
1515}
1516
1517static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right)
1518{
1519    target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1520    TCGv_i32 t0, t1;
1521
1522    /* load */
1523    if (op1 == OR_TMP0) {
1524        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1525    } else {
1526        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1527    }
1528
1529    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
1530
1531    switch (ot) {
1532    case MO_8:
1533        /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1534        tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
1535        tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101);
1536        goto do_long;
1537    case MO_16:
1538        /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1539        tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16);
1540        goto do_long;
1541    do_long:
1542#ifdef TARGET_X86_64
1543    case MO_32:
1544        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
1545        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
1546        if (is_right) {
1547            tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1548        } else {
1549            tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
1550        }
1551        tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
1552        break;
1553#endif
1554    default:
1555        if (is_right) {
1556            tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1557        } else {
1558            tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1559        }
1560        break;
1561    }
1562
1563    /* store */
1564    gen_op_st_rm_T0_A0(s, ot, op1);
1565
1566    /* We'll need the flags computed into CC_SRC.  */
1567    gen_compute_eflags(s);
1568
1569    /* The value that was "rotated out" is now present at the other end
1570       of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1571       since we've computed the flags into CC_SRC, these variables are
1572       currently dead.  */
1573    if (is_right) {
1574        tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
1575        tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
1576        tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1577    } else {
1578        tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
1579        tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
1580    }
1581    tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1582    tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1583
1584    /* Now conditionally store the new CC_OP value.  If the shift count
1585       is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1586       Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1587       exactly as we computed above.  */
1588    t0 = tcg_const_i32(0);
1589    t1 = tcg_temp_new_i32();
1590    tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
1591    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); 
1592    tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
1593    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1594                        cpu_tmp2_i32, cpu_tmp3_i32);
1595    tcg_temp_free_i32(t0);
1596    tcg_temp_free_i32(t1);
1597
1598    /* The CC_OP value is no longer predictable.  */ 
1599    set_cc_op(s, CC_OP_DYNAMIC);
1600}
1601
1602static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2,
1603                          int is_right)
1604{
1605    int mask = (ot == MO_64 ? 0x3f : 0x1f);
1606    int shift;
1607
1608    /* load */
1609    if (op1 == OR_TMP0) {
1610        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1611    } else {
1612        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1613    }
1614
1615    op2 &= mask;
1616    if (op2 != 0) {
1617        switch (ot) {
1618#ifdef TARGET_X86_64
1619        case MO_32:
1620            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
1621            if (is_right) {
1622                tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1623            } else {
1624                tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
1625            }
1626            tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
1627            break;
1628#endif
1629        default:
1630            if (is_right) {
1631                tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2);
1632            } else {
1633                tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2);
1634            }
1635            break;
1636        case MO_8:
1637            mask = 7;
1638            goto do_shifts;
1639        case MO_16:
1640            mask = 15;
1641        do_shifts:
1642            shift = op2 & mask;
1643            if (is_right) {
1644                shift = mask + 1 - shift;
1645            }
1646            gen_extu(ot, cpu_T[0]);
1647            tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift);
1648            tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift);
1649            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
1650            break;
1651        }
1652    }
1653
1654    /* store */
1655    gen_op_st_rm_T0_A0(s, ot, op1);
1656
1657    if (op2 != 0) {
1658        /* Compute the flags into CC_SRC.  */
1659        gen_compute_eflags(s);
1660
1661        /* The value that was "rotated out" is now present at the other end
1662           of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1663           since we've computed the flags into CC_SRC, these variables are
1664           currently dead.  */
1665        if (is_right) {
1666            tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
1667            tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
1668            tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1669        } else {
1670            tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
1671            tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
1672        }
1673        tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1674        tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1675        set_cc_op(s, CC_OP_ADCOX);
1676    }
1677}
1678
1679/* XXX: add faster immediate = 1 case */
1680static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1681                           int is_right)
1682{
1683    gen_compute_eflags(s);
1684    assert(s->cc_op == CC_OP_EFLAGS);
1685
1686    /* load */
1687    if (op1 == OR_TMP0)
1688        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1689    else
1690        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1691    
1692    if (is_right) {
1693        switch (ot) {
1694        case MO_8:
1695            gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1696            break;
1697        case MO_16:
1698            gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1699            break;
1700        case MO_32:
1701            gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1702            break;
1703#ifdef TARGET_X86_64
1704        case MO_64:
1705            gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1706            break;
1707#endif
1708        default:
1709            tcg_abort();
1710        }
1711    } else {
1712        switch (ot) {
1713        case MO_8:
1714            gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1715            break;
1716        case MO_16:
1717            gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1718            break;
1719        case MO_32:
1720            gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1721            break;
1722#ifdef TARGET_X86_64
1723        case MO_64:
1724            gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
1725            break;
1726#endif
1727        default:
1728            tcg_abort();
1729        }
1730    }
1731    /* store */
1732    gen_op_st_rm_T0_A0(s, ot, op1);
1733}
1734
1735/* XXX: add faster immediate case */
1736static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1,
1737                             bool is_right, TCGv count_in)
1738{
1739    target_ulong mask = (ot == MO_64 ? 63 : 31);
1740    TCGv count;
1741
1742    /* load */
1743    if (op1 == OR_TMP0) {
1744        gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
1745    } else {
1746        gen_op_mov_v_reg(ot, cpu_T[0], op1);
1747    }
1748
1749    count = tcg_temp_new();
1750    tcg_gen_andi_tl(count, count_in, mask);
1751
1752    switch (ot) {
1753    case MO_16:
1754        /* Note: we implement the Intel behaviour for shift count > 16.
1755           This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1756           portion by constructing it as a 32-bit value.  */
1757        if (is_right) {
1758            tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16);
1759            tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
1760            tcg_gen_mov_tl(cpu_T[0], cpu_tmp0);
1761        } else {
1762            tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16);
1763        }
1764        /* FALLTHRU */
1765#ifdef TARGET_X86_64
1766    case MO_32:
1767        /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1768        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1769        if (is_right) {
1770            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]);
1771            tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
1772            tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count);
1773        } else {
1774            tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]);
1775            tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
1776            tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count);
1777            tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
1778            tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32);
1779        }
1780        break;
1781#endif
1782    default:
1783        tcg_gen_subi_tl(cpu_tmp0, count, 1);
1784        if (is_right) {
1785            tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1786
1787            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1788            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count);
1789            tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
1790        } else {
1791            tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
1792            if (ot == MO_16) {
1793                /* Only needed if count > 16, for Intel behaviour.  */
1794                tcg_gen_subfi_tl(cpu_tmp4, 33, count);
1795                tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
1796                tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
1797            }
1798
1799            tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
1800            tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count);
1801            tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
1802        }
1803        tcg_gen_movi_tl(cpu_tmp4, 0);
1804        tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4,
1805                           cpu_tmp4, cpu_T[1]);
1806        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
1807        break;
1808    }
1809
1810    /* store */
1811    gen_op_st_rm_T0_A0(s, ot, op1);
1812
1813    gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
1814    tcg_temp_free(count);
1815}
1816
1817static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s)
1818{
1819    if (s != OR_TMP1)
1820        gen_op_mov_v_reg(ot, cpu_T[1], s);
1821    switch(op) {
1822    case OP_ROL:
1823        gen_rot_rm_T1(s1, ot, d, 0);
1824        break;
1825    case OP_ROR:
1826        gen_rot_rm_T1(s1, ot, d, 1);
1827        break;
1828    case OP_SHL:
1829    case OP_SHL1:
1830        gen_shift_rm_T1(s1, ot, d, 0, 0);
1831        break;
1832    case OP_SHR:
1833        gen_shift_rm_T1(s1, ot, d, 1, 0);
1834        break;
1835    case OP_SAR:
1836        gen_shift_rm_T1(s1, ot, d, 1, 1);
1837        break;
1838    case OP_RCL:
1839        gen_rotc_rm_T1(s1, ot, d, 0);
1840        break;
1841    case OP_RCR:
1842        gen_rotc_rm_T1(s1, ot, d, 1);
1843        break;
1844    }
1845}
1846
1847static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c)
1848{
1849    switch(op) {
1850    case OP_ROL:
1851        gen_rot_rm_im(s1, ot, d, c, 0);
1852        break;
1853    case OP_ROR:
1854        gen_rot_rm_im(s1, ot, d, c, 1);
1855        break;
1856    case OP_SHL:
1857    case OP_SHL1:
1858        gen_shift_rm_im(s1, ot, d, c, 0, 0);
1859        break;
1860    case OP_SHR:
1861        gen_shift_rm_im(s1, ot, d, c, 1, 0);
1862        break;
1863    case OP_SAR:
1864        gen_shift_rm_im(s1, ot, d, c, 1, 1);
1865        break;
1866    default:
1867        /* currently not optimized */
1868        tcg_gen_movi_tl(cpu_T[1], c);
1869        gen_shift(s1, op, ot, d, OR_TMP1);
1870        break;
1871    }
1872}
1873
1874static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
1875{
1876    target_long disp;
1877    int havesib;
1878    int base;
1879    int index;
1880    int scale;
1881    int mod, rm, code, override, must_add_seg;
1882    TCGv sum;
1883
1884    override = s->override;
1885    must_add_seg = s->addseg;
1886    if (override >= 0)
1887        must_add_seg = 1;
1888    mod = (modrm >> 6) & 3;
1889    rm = modrm & 7;
1890
1891    switch (s->aflag) {
1892    case MO_64:
1893    case MO_32:
1894        havesib = 0;
1895        base = rm;
1896        index = -1;
1897        scale = 0;
1898
1899        if (base == 4) {
1900            havesib = 1;
1901            code = cpu_ldub_code(env, s->pc++);
1902            scale = (code >> 6) & 3;
1903            index = ((code >> 3) & 7) | REX_X(s);
1904            if (index == 4) {
1905                index = -1;  /* no index */
1906            }
1907            base = (code & 7);
1908        }
1909        base |= REX_B(s);
1910
1911        switch (mod) {
1912        case 0:
1913            if ((base & 7) == 5) {
1914                base = -1;
1915                disp = (int32_t)cpu_ldl_code(env, s->pc);
1916                s->pc += 4;
1917                if (CODE64(s) && !havesib) {
1918                    disp += s->pc + s->rip_offset;
1919                }
1920            } else {
1921                disp = 0;
1922            }
1923            break;
1924        case 1:
1925            disp = (int8_t)cpu_ldub_code(env, s->pc++);
1926            break;
1927        default:
1928        case 2:
1929            disp = (int32_t)cpu_ldl_code(env, s->pc);
1930            s->pc += 4;
1931            break;
1932        }
1933
1934        /* For correct popl handling with esp.  */
1935        if (base == R_ESP && s->popl_esp_hack) {
1936            disp += s->popl_esp_hack;
1937        }
1938
1939        /* Compute the address, with a minimum number of TCG ops.  */
1940        TCGV_UNUSED(sum);
1941        if (index >= 0) {
1942            if (scale == 0) {
1943                sum = cpu_regs[index];
1944            } else {
1945                tcg_gen_shli_tl(cpu_A0, cpu_regs[index], scale);
1946                sum = cpu_A0;
1947            }
1948            if (base >= 0) {
1949                tcg_gen_add_tl(cpu_A0, sum, cpu_regs[base]);
1950                sum = cpu_A0;
1951            }
1952        } else if (base >= 0) {
1953            sum = cpu_regs[base];
1954        }
1955        if (TCGV_IS_UNUSED(sum)) {
1956            tcg_gen_movi_tl(cpu_A0, disp);
1957        } else {
1958            tcg_gen_addi_tl(cpu_A0, sum, disp);
1959        }
1960
1961        if (must_add_seg) {
1962            if (override < 0) {
1963                if (base == R_EBP || base == R_ESP) {
1964                    override = R_SS;
1965                } else {
1966                    override = R_DS;
1967                }
1968            }
1969
1970            tcg_gen_ld_tl(cpu_tmp0, cpu_env,
1971                          offsetof(CPUX86State, segs[override].base));
1972            if (CODE64(s)) {
1973                if (s->aflag == MO_32) {
1974                    tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
1975                }
1976                tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
1977                return;
1978            }
1979
1980            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
1981        }
1982
1983        if (s->aflag == MO_32) {
1984            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
1985        }
1986        break;
1987
1988    case MO_16:
1989        switch (mod) {
1990        case 0:
1991            if (rm == 6) {
1992                disp = cpu_lduw_code(env, s->pc);
1993                s->pc += 2;
1994                tcg_gen_movi_tl(cpu_A0, disp);
1995                rm = 0; /* avoid SS override */
1996                goto no_rm;
1997            } else {
1998                disp = 0;
1999            }
2000            break;
2001        case 1:
2002            disp = (int8_t)cpu_ldub_code(env, s->pc++);
2003            break;
2004        default:
2005        case 2:
2006            disp = (int16_t)cpu_lduw_code(env, s->pc);
2007            s->pc += 2;
2008            break;
2009        }
2010
2011        sum = cpu_A0;
2012        switch (rm) {
2013        case 0:
2014            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_ESI]);
2015            break;
2016        case 1:
2017            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBX], cpu_regs[R_EDI]);
2018            break;
2019        case 2:
2020            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_ESI]);
2021            break;
2022        case 3:
2023            tcg_gen_add_tl(cpu_A0, cpu_regs[R_EBP], cpu_regs[R_EDI]);
2024            break;
2025        case 4:
2026            sum = cpu_regs[R_ESI];
2027            break;
2028        case 5:
2029            sum = cpu_regs[R_EDI];
2030            break;
2031        case 6:
2032            sum = cpu_regs[R_EBP];
2033            break;
2034        default:
2035        case 7:
2036            sum = cpu_regs[R_EBX];
2037            break;
2038        }
2039        tcg_gen_addi_tl(cpu_A0, sum, disp);
2040        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2041    no_rm:
2042        if (must_add_seg) {
2043            if (override < 0) {
2044                if (rm == 2 || rm == 3 || rm == 6) {
2045                    override = R_SS;
2046                } else {
2047                    override = R_DS;
2048                }
2049            }
2050            gen_op_addl_A0_seg(s, override);
2051        }
2052        break;
2053
2054    default:
2055        tcg_abort();
2056    }
2057}
2058
2059static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2060{
2061    int mod, rm, base, code;
2062
2063    mod = (modrm >> 6) & 3;
2064    if (mod == 3)
2065        return;
2066    rm = modrm & 7;
2067
2068    switch (s->aflag) {
2069    case MO_64:
2070    case MO_32:
2071        base = rm;
2072
2073        if (base == 4) {
2074            code = cpu_ldub_code(env, s->pc++);
2075            base = (code & 7);
2076        }
2077
2078        switch (mod) {
2079        case 0:
2080            if (base == 5) {
2081                s->pc += 4;
2082            }
2083            break;
2084        case 1:
2085            s->pc++;
2086            break;
2087        default:
2088        case 2:
2089            s->pc += 4;
2090            break;
2091        }
2092        break;
2093
2094    case MO_16:
2095        switch (mod) {
2096        case 0:
2097            if (rm == 6) {
2098                s->pc += 2;
2099            }
2100            break;
2101        case 1:
2102            s->pc++;
2103            break;
2104        default:
2105        case 2:
2106            s->pc += 2;
2107            break;
2108        }
2109        break;
2110
2111    default:
2112        tcg_abort();
2113    }
2114}
2115
2116/* used for LEA and MOV AX, mem */
2117static void gen_add_A0_ds_seg(DisasContext *s)
2118{
2119    int override, must_add_seg;
2120    must_add_seg = s->addseg;
2121    override = R_DS;
2122    if (s->override >= 0) {
2123        override = s->override;
2124        must_add_seg = 1;
2125    }
2126    if (must_add_seg) {
2127#ifdef TARGET_X86_64
2128        if (CODE64(s)) {
2129            gen_op_addq_A0_seg(override);
2130        } else
2131#endif
2132        {
2133            gen_op_addl_A0_seg(s, override);
2134        }
2135    }
2136}
2137
2138/* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2139   OR_TMP0 */
2140static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2141                           TCGMemOp ot, int reg, int is_store)
2142{
2143    int mod, rm;
2144
2145    mod = (modrm >> 6) & 3;
2146    rm = (modrm & 7) | REX_B(s);
2147    if (mod == 3) {
2148        if (is_store) {
2149            if (reg != OR_TMP0)
2150                gen_op_mov_v_reg(ot, cpu_T[0], reg);
2151            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
2152        } else {
2153            gen_op_mov_v_reg(ot, cpu_T[0], rm);
2154            if (reg != OR_TMP0)
2155                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2156        }
2157    } else {
2158        gen_lea_modrm(env, s, modrm);
2159        if (is_store) {
2160            if (reg != OR_TMP0)
2161                gen_op_mov_v_reg(ot, cpu_T[0], reg);
2162            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2163        } else {
2164            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
2165            if (reg != OR_TMP0)
2166                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2167        }
2168    }
2169}
2170
2171static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot)
2172{
2173    uint32_t ret;
2174
2175    switch (ot) {
2176    case MO_8:
2177        ret = cpu_ldub_code(env, s->pc);
2178        s->pc++;
2179        break;
2180    case MO_16:
2181        ret = cpu_lduw_code(env, s->pc);
2182        s->pc += 2;
2183        break;
2184    case MO_32:
2185#ifdef TARGET_X86_64
2186    case MO_64:
2187#endif
2188        ret = cpu_ldl_code(env, s->pc);
2189        s->pc += 4;
2190        break;
2191    default:
2192        tcg_abort();
2193    }
2194    return ret;
2195}
2196
2197static inline int insn_const_size(TCGMemOp ot)
2198{
2199    if (ot <= MO_32) {
2200        return 1 << ot;
2201    } else {
2202        return 4;
2203    }
2204}
2205
2206static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2207{
2208    TranslationBlock *tb;
2209    target_ulong pc;
2210
2211    pc = s->cs_base + eip;
2212    tb = s->tb;
2213    /* NOTE: we handle the case where the TB spans two pages here */
2214    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
2215        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
2216        /* jump to same page: we can use a direct jump */
2217        tcg_gen_goto_tb(tb_num);
2218        gen_jmp_im(eip);
2219        tcg_gen_exit_tb((uintptr_t)tb + tb_num);
2220    } else {
2221        /* jump to another page: currently not optimized */
2222        gen_jmp_im(eip);
2223        gen_eob(s);
2224    }
2225}
2226
2227static inline void gen_jcc(DisasContext *s, int b,
2228                           target_ulong val, target_ulong next_eip)
2229{
2230    int l1, l2;
2231
2232    if (s->jmp_opt) {
2233        l1 = gen_new_label();
2234        gen_jcc1(s, b, l1);
2235
2236        gen_goto_tb(s, 0, next_eip);
2237
2238        gen_set_label(l1);
2239        gen_goto_tb(s, 1, val);
2240        s->is_jmp = DISAS_TB_JUMP;
2241    } else {
2242        l1 = gen_new_label();
2243        l2 = gen_new_label();
2244        gen_jcc1(s, b, l1);
2245
2246        gen_jmp_im(next_eip);
2247        tcg_gen_br(l2);
2248
2249        gen_set_label(l1);
2250        gen_jmp_im(val);
2251        gen_set_label(l2);
2252        gen_eob(s);
2253    }
2254}
2255
2256static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b,
2257                        int modrm, int reg)
2258{
2259    CCPrepare cc;
2260
2261    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2262
2263    cc = gen_prepare_cc(s, b, cpu_T[1]);
2264    if (cc.mask != -1) {
2265        TCGv t0 = tcg_temp_new();
2266        tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2267        cc.reg = t0;
2268    }
2269    if (!cc.use_reg2) {
2270        cc.reg2 = tcg_const_tl(cc.imm);
2271    }
2272
2273    tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2,
2274                       cpu_T[0], cpu_regs[reg]);
2275    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
2276
2277    if (cc.mask != -1) {
2278        tcg_temp_free(cc.reg);
2279    }
2280    if (!cc.use_reg2) {
2281        tcg_temp_free(cc.reg2);
2282    }
2283}
2284
2285static inline void gen_op_movl_T0_seg(int seg_reg)
2286{
2287    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
2288                     offsetof(CPUX86State,segs[seg_reg].selector));
2289}
2290
2291static inline void gen_op_movl_seg_T0_vm(int seg_reg)
2292{
2293    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
2294    tcg_gen_st32_tl(cpu_T[0], cpu_env, 
2295                    offsetof(CPUX86State,segs[seg_reg].selector));
2296    tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
2297    tcg_gen_st_tl(cpu_T[0], cpu_env, 
2298                  offsetof(CPUX86State,segs[seg_reg].base));
2299}
2300
2301/* move T0 to seg_reg and compute if the CPU state may change. Never
2302   call this function with seg_reg == R_CS */
2303static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
2304{
2305    if (s->pe && !s->vm86) {
2306        /* XXX: optimize by finding processor state dynamically */
2307        gen_update_cc_op(s);
2308        gen_jmp_im(cur_eip);
2309        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
2310        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
2311        /* abort translation because the addseg value may change or
2312           because ss32 may change. For R_SS, translation must always
2313           stop as a special handling must be done to disable hardware
2314           interrupts for the next instruction */
2315        if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS))
2316            s->is_jmp = DISAS_TB_JUMP;
2317    } else {
2318        gen_op_movl_seg_T0_vm(seg_reg);
2319        if (seg_reg == R_SS)
2320            s->is_jmp = DISAS_TB_JUMP;
2321    }
2322}
2323
2324static inline int svm_is_rep(int prefixes)
2325{
2326    return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2327}
2328
2329static inline void
2330gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2331                              uint32_t type, uint64_t param)
2332{
2333    /* no SVM activated; fast case */
2334    if (likely(!(s->flags & HF_SVMI_MASK)))
2335        return;
2336    gen_update_cc_op(s);
2337    gen_jmp_im(pc_start - s->cs_base);
2338    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2339                                         tcg_const_i64(param));
2340}
2341
2342static inline void
2343gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2344{
2345    gen_svm_check_intercept_param(s, pc_start, type, 0);
2346}
2347
2348static inline void gen_stack_update(DisasContext *s, int addend)
2349{
2350#ifdef TARGET_X86_64
2351    if (CODE64(s)) {
2352        gen_op_add_reg_im(MO_64, R_ESP, addend);
2353    } else
2354#endif
2355    if (s->ss32) {
2356        gen_op_add_reg_im(MO_32, R_ESP, addend);
2357    } else {
2358        gen_op_add_reg_im(MO_16, R_ESP, addend);
2359    }
2360}
2361
2362/* Generate a push. It depends on ss32, addseg and dflag.  */
2363static void gen_push_v(DisasContext *s, TCGv val)
2364{
2365    TCGMemOp a_ot, d_ot = mo_pushpop(s, s->dflag);
2366    int size = 1 << d_ot;
2367    TCGv new_esp = cpu_A0;
2368
2369    tcg_gen_subi_tl(cpu_A0, cpu_regs[R_ESP], size);
2370
2371    if (CODE64(s)) {
2372        a_ot = MO_64;
2373    } else if (s->ss32) {
2374        a_ot = MO_32;
2375        if (s->addseg) {
2376            new_esp = cpu_tmp4;
2377            tcg_gen_mov_tl(new_esp, cpu_A0);
2378            gen_op_addl_A0_seg(s, R_SS);
2379        } else {
2380            tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
2381        }
2382    } else {
2383        a_ot = MO_16;
2384        new_esp = cpu_tmp4;
2385        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2386        tcg_gen_mov_tl(new_esp, cpu_A0);
2387        gen_op_addl_A0_seg(s, R_SS);
2388    }
2389
2390    gen_op_st_v(s, d_ot, val, cpu_A0);
2391    gen_op_mov_reg_v(a_ot, R_ESP, new_esp);
2392}
2393
2394/* two step pop is necessary for precise exceptions */
2395static TCGMemOp gen_pop_T0(DisasContext *s)
2396{
2397    TCGMemOp d_ot = mo_pushpop(s, s->dflag);
2398    TCGv addr = cpu_A0;
2399
2400    if (CODE64(s)) {
2401        addr = cpu_regs[R_ESP];
2402    } else if (!s->ss32) {
2403        tcg_gen_ext16u_tl(cpu_A0, cpu_regs[R_ESP]);
2404        gen_op_addl_A0_seg(s, R_SS);
2405    } else if (s->addseg) {
2406        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_ESP]);
2407        gen_op_addl_A0_seg(s, R_SS);
2408    } else {
2409        tcg_gen_ext32u_tl(cpu_A0, cpu_regs[R_ESP]);
2410    }
2411
2412    gen_op_ld_v(s, d_ot, cpu_T[0], addr);
2413    return d_ot;
2414}
2415
2416static void gen_pop_update(DisasContext *s, TCGMemOp ot)
2417{
2418    gen_stack_update(s, 1 << ot);
2419}
2420
2421static void gen_stack_A0(DisasContext *s)
2422{
2423    gen_op_movl_A0_reg(R_ESP);
2424    if (!s->ss32)
2425        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2426    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2427    if (s->addseg)
2428        gen_op_addl_A0_seg(s, R_SS);
2429}
2430
2431/* NOTE: wrap around in 16 bit not fully handled */
2432static void gen_pusha(DisasContext *s)
2433{
2434    int i;
2435    gen_op_movl_A0_reg(R_ESP);
2436    gen_op_addl_A0_im(-8 << s->dflag);
2437    if (!s->ss32)
2438        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2439    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2440    if (s->addseg)
2441        gen_op_addl_A0_seg(s, R_SS);
2442    for(i = 0;i < 8; i++) {
2443        gen_op_mov_v_reg(MO_32, cpu_T[0], 7 - i);
2444        gen_op_st_v(s, s->dflag, cpu_T[0], cpu_A0);
2445        gen_op_addl_A0_im(1 << s->dflag);
2446    }
2447    gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2448}
2449
2450/* NOTE: wrap around in 16 bit not fully handled */
2451static void gen_popa(DisasContext *s)
2452{
2453    int i;
2454    gen_op_movl_A0_reg(R_ESP);
2455    if (!s->ss32)
2456        tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2457    tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2458    tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 8 << s->dflag);
2459    if (s->addseg)
2460        gen_op_addl_A0_seg(s, R_SS);
2461    for(i = 0;i < 8; i++) {
2462        /* ESP is not reloaded */
2463        if (i != 3) {
2464            gen_op_ld_v(s, s->dflag, cpu_T[0], cpu_A0);
2465            gen_op_mov_reg_v(s->dflag, 7 - i, cpu_T[0]);
2466        }
2467        gen_op_addl_A0_im(1 << s->dflag);
2468    }
2469    gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2470}
2471
2472static void gen_enter(DisasContext *s, int esp_addend, int level)
2473{
2474    TCGMemOp ot = mo_pushpop(s, s->dflag);
2475    int opsize = 1 << ot;
2476
2477    level &= 0x1f;
2478#ifdef TARGET_X86_64
2479    if (CODE64(s)) {
2480        gen_op_movl_A0_reg(R_ESP);
2481        gen_op_addq_A0_im(-opsize);
2482        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2483
2484        /* push bp */
2485        gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
2486        gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2487        if (level) {
2488            /* XXX: must save state */
2489            gen_helper_enter64_level(cpu_env, tcg_const_i32(level),
2490                                     tcg_const_i32((ot == MO_64)),
2491                                     cpu_T[1]);
2492        }
2493        gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
2494        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2495        gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[1]);
2496    } else
2497#endif
2498    {
2499        gen_op_movl_A0_reg(R_ESP);
2500        gen_op_addl_A0_im(-opsize);
2501        if (!s->ss32)
2502            tcg_gen_ext16u_tl(cpu_A0, cpu_A0);
2503        tcg_gen_mov_tl(cpu_T[1], cpu_A0);
2504        if (s->addseg)
2505            gen_op_addl_A0_seg(s, R_SS);
2506        /* push bp */
2507        gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
2508        gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
2509        if (level) {
2510            /* XXX: must save state */
2511            gen_helper_enter_level(cpu_env, tcg_const_i32(level),
2512                                   tcg_const_i32(s->dflag - 1),
2513                                   cpu_T[1]);
2514        }
2515        gen_op_mov_reg_v(ot, R_EBP, cpu_T[1]);
2516        tcg_gen_addi_tl(cpu_T[1], cpu_T[1], -esp_addend + (-opsize * level));
2517        gen_op_mov_reg_v(MO_16 + s->ss32, R_ESP, cpu_T[1]);
2518    }
2519}
2520
2521static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
2522{
2523    gen_update_cc_op(s);
2524    gen_jmp_im(cur_eip);
2525    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
2526    s->is_jmp = DISAS_TB_JUMP;
2527}
2528
2529/* an interrupt is different from an exception because of the
2530   privilege checks */
2531static void gen_interrupt(DisasContext *s, int intno,
2532                          target_ulong cur_eip, target_ulong next_eip)
2533{
2534    gen_update_cc_op(s);
2535    gen_jmp_im(cur_eip);
2536    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2537                               tcg_const_i32(next_eip - cur_eip));
2538    s->is_jmp = DISAS_TB_JUMP;
2539}
2540
2541static void gen_debug(DisasContext *s, target_ulong cur_eip)
2542{
2543    gen_update_cc_op(s);
2544    gen_jmp_im(cur_eip);
2545    gen_helper_debug(cpu_env);
2546    s->is_jmp = DISAS_TB_JUMP;
2547}
2548
2549/* generate a generic end of block. Trace exception is also generated
2550   if needed */
2551static void gen_eob(DisasContext *s)
2552{
2553    gen_update_cc_op(s);
2554    if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
2555        gen_helper_reset_inhibit_irq(cpu_env);
2556    }
2557    if (s->tb->flags & HF_RF_MASK) {
2558        gen_helper_reset_rf(cpu_env);
2559    }
2560    if (s->singlestep_enabled) {
2561        gen_helper_debug(cpu_env);
2562    } else if (s->tf) {
2563        gen_helper_single_step(cpu_env);
2564    } else {
2565        tcg_gen_exit_tb(0);
2566    }
2567    s->is_jmp = DISAS_TB_JUMP;
2568}
2569
2570/* generate a jump to eip. No segment change must happen before as a
2571   direct call to the next block may occur */
2572static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2573{
2574    gen_update_cc_op(s);
2575    set_cc_op(s, CC_OP_DYNAMIC);
2576    if (s->jmp_opt) {
2577        gen_goto_tb(s, tb_num, eip);
2578        s->is_jmp = DISAS_TB_JUMP;
2579    } else {
2580        gen_jmp_im(eip);
2581        gen_eob(s);
2582    }
2583}
2584
2585static void gen_jmp(DisasContext *s, target_ulong eip)
2586{
2587    gen_jmp_tb(s, eip, 0);
2588}
2589
2590static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2591{
2592    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2593    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset);
2594}
2595
2596static inline void gen_stq_env_A0(DisasContext *s, int offset)
2597{
2598    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset);
2599    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
2600}
2601
2602static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2603{
2604    int mem_index = s->mem_index;
2605    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2606    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2607    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2608    tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2609    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2610}
2611
2612static inline void gen_sto_env_A0(DisasContext *s, int offset)
2613{
2614    int mem_index = s->mem_index;
2615    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(0)));
2616    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, mem_index, MO_LEQ);
2617    tcg_gen_addi_tl(cpu_tmp0, cpu_A0, 8);
2618    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, offset + offsetof(XMMReg, XMM_Q(1)));
2619    tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_tmp0, mem_index, MO_LEQ);
2620}
2621
2622static inline void gen_op_movo(int d_offset, int s_offset)
2623{
2624    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2625    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2626    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset + 8);
2627    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset + 8);
2628}
2629
2630static inline void gen_op_movq(int d_offset, int s_offset)
2631{
2632    tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env, s_offset);
2633    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2634}
2635
2636static inline void gen_op_movl(int d_offset, int s_offset)
2637{
2638    tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env, s_offset);
2639    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, d_offset);
2640}
2641
2642static inline void gen_op_movq_env_0(int d_offset)
2643{
2644    tcg_gen_movi_i64(cpu_tmp1_i64, 0);
2645    tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
2646}
2647
2648typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2649typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2650typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2651typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2652typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2653typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2654                               TCGv_i32 val);
2655typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2656typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2657                               TCGv val);
2658
2659#define SSE_SPECIAL ((void *)1)
2660#define SSE_DUMMY ((void *)2)
2661
2662#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2663#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2664                     gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2665
2666static const SSEFunc_0_epp sse_op_table1[256][4] = {
2667    /* 3DNow! extensions */
2668    [0x0e] = { SSE_DUMMY }, /* femms */
2669    [0x0f] = { SSE_DUMMY }, /* pf... */
2670    /* pure SSE operations */
2671    [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2672    [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2673    [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2674    [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2675    [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2676    [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2677    [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2678    [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2679
2680    [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2681    [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2682    [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2683    [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2684    [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2685    [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2686    [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2687    [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2688    [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2689    [0x51] = SSE_FOP(sqrt),
2690    [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2691    [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2692    [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2693    [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2694    [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2695    [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2696    [0x58] = SSE_FOP(add),
2697    [0x59] = SSE_FOP(mul),
2698    [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2699               gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2700    [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2701    [0x5c] = SSE_FOP(sub),
2702    [0x5d] = SSE_FOP(min),
2703    [0x5e] = SSE_FOP(div),
2704    [0x5f] = SSE_FOP(max),
2705
2706    [0xc2] = SSE_FOP(cmpeq),
2707    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2708               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2709
2710    /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2711    [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2712    [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2713
2714    /* MMX ops and their SSE extensions */
2715    [0x60] = MMX_OP2(punpcklbw),
2716    [0x61] = MMX_OP2(punpcklwd),
2717    [0x62] = MMX_OP2(punpckldq),
2718    [0x63] = MMX_OP2(packsswb),
2719    [0x64] = MMX_OP2(pcmpgtb),
2720    [0x65] = MMX_OP2(pcmpgtw),
2721    [0x66] = MMX_OP2(pcmpgtl),
2722    [0x67] = MMX_OP2(packuswb),
2723    [0x68] = MMX_OP2(punpckhbw),
2724    [0x69] = MMX_OP2(punpckhwd),
2725    [0x6a] = MMX_OP2(punpckhdq),
2726    [0x6b] = MMX_OP2(packssdw),
2727    [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2728    [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2729    [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2730    [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2731    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2732               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2733               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2734               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2735    [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2736    [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2737    [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2738    [0x74] = MMX_OP2(pcmpeqb),
2739    [0x75] = MMX_OP2(pcmpeqw),
2740    [0x76] = MMX_OP2(pcmpeql),
2741    [0x77] = { SSE_DUMMY }, /* emms */
2742    [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2743    [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2744    [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2745    [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2746    [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2747    [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2748    [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2749    [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2750    [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2751    [0xd1] = MMX_OP2(psrlw),
2752    [0xd2] = MMX_OP2(psrld),
2753    [0xd3] = MMX_OP2(psrlq),
2754    [0xd4] = MMX_OP2(paddq),
2755    [0xd5] = MMX_OP2(pmullw),
2756    [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2757    [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2758    [0xd8] = MMX_OP2(psubusb),
2759    [0xd9] = MMX_OP2(psubusw),
2760    [0xda] = MMX_OP2(pminub),
2761    [0xdb] = MMX_OP2(pand),
2762    [0xdc] = MMX_OP2(paddusb),
2763    [0xdd] = MMX_OP2(paddusw),
2764    [0xde] = MMX_OP2(pmaxub),
2765    [0xdf] = MMX_OP2(pandn),
2766    [0xe0] = MMX_OP2(pavgb),
2767    [0xe1] = MMX_OP2(psraw),
2768    [0xe2] = MMX_OP2(psrad),
2769    [0xe3] = MMX_OP2(pavgw),
2770    [0xe4] = MMX_OP2(pmulhuw),
2771    [0xe5] = MMX_OP2(pmulhw),
2772    [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2773    [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2774    [0xe8] = MMX_OP2(psubsb),
2775    [0xe9] = MMX_OP2(psubsw),
2776    [0xea] = MMX_OP2(pminsw),
2777    [0xeb] = MMX_OP2(por),
2778    [0xec] = MMX_OP2(paddsb),
2779    [0xed] = MMX_OP2(paddsw),
2780    [0xee] = MMX_OP2(pmaxsw),
2781    [0xef] = MMX_OP2(pxor),
2782    [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2783    [0xf1] = MMX_OP2(psllw),
2784    [0xf2] = MMX_OP2(pslld),
2785    [0xf3] = MMX_OP2(psllq),
2786    [0xf4] = MMX_OP2(pmuludq),
2787    [0xf5] = MMX_OP2(pmaddwd),
2788    [0xf6] = MMX_OP2(psadbw),
2789    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2790               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2791    [0xf8] = MMX_OP2(psubb),
2792    [0xf9] = MMX_OP2(psubw),
2793    [0xfa] = MMX_OP2(psubl),
2794    [0xfb] = MMX_OP2(psubq),
2795    [0xfc] = MMX_OP2(paddb),
2796    [0xfd] = MMX_OP2(paddw),
2797    [0xfe] = MMX_OP2(paddl),
2798};
2799
2800static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2801    [0 + 2] = MMX_OP2(psrlw),
2802    [0 + 4] = MMX_OP2(psraw),
2803    [0 + 6] = MMX_OP2(psllw),
2804    [8 + 2] = MMX_OP2(psrld),
2805    [8 + 4] = MMX_OP2(psrad),
2806    [8 + 6] = MMX_OP2(pslld),
2807    [16 + 2] = MMX_OP2(psrlq),
2808    [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2809    [16 + 6] = MMX_OP2(psllq),
2810    [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2811};
2812
2813static const SSEFunc_0_epi sse_op_table3ai[] = {
2814    gen_helper_cvtsi2ss,
2815    gen_helper_cvtsi2sd
2816};
2817
2818#ifdef TARGET_X86_64
2819static const SSEFunc_0_epl sse_op_table3aq[] = {
2820    gen_helper_cvtsq2ss,
2821    gen_helper_cvtsq2sd
2822};
2823#endif
2824
2825static const SSEFunc_i_ep sse_op_table3bi[] = {
2826    gen_helper_cvttss2si,
2827    gen_helper_cvtss2si,
2828    gen_helper_cvttsd2si,
2829    gen_helper_cvtsd2si
2830};
2831
2832#ifdef TARGET_X86_64
2833static const SSEFunc_l_ep sse_op_table3bq[] = {
2834    gen_helper_cvttss2sq,
2835    gen_helper_cvtss2sq,
2836    gen_helper_cvttsd2sq,
2837    gen_helper_cvtsd2sq
2838};
2839#endif
2840
2841static const SSEFunc_0_epp sse_op_table4[8][4] = {
2842    SSE_FOP(cmpeq),
2843    SSE_FOP(cmplt),
2844    SSE_FOP(cmple),
2845    SSE_FOP(cmpunord),
2846    SSE_FOP(cmpneq),
2847    SSE_FOP(cmpnlt),
2848    SSE_FOP(cmpnle),
2849    SSE_FOP(cmpord),
2850};
2851
2852static const SSEFunc_0_epp sse_op_table5[256] = {
2853    [0x0c] = gen_helper_pi2fw,
2854    [0x0d] = gen_helper_pi2fd,
2855    [0x1c] = gen_helper_pf2iw,
2856    [0x1d] = gen_helper_pf2id,
2857    [0x8a] = gen_helper_pfnacc,
2858    [0x8e] = gen_helper_pfpnacc,
2859    [0x90] = gen_helper_pfcmpge,
2860    [0x94] = gen_helper_pfmin,
2861    [0x96] = gen_helper_pfrcp,
2862    [0x97] = gen_helper_pfrsqrt,
2863    [0x9a] = gen_helper_pfsub,
2864    [0x9e] = gen_helper_pfadd,
2865    [0xa0] = gen_helper_pfcmpgt,
2866    [0xa4] = gen_helper_pfmax,
2867    [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2868    [0xa7] = gen_helper_movq, /* pfrsqit1 */
2869    [0xaa] = gen_helper_pfsubr,
2870    [0xae] = gen_helper_pfacc,
2871    [0xb0] = gen_helper_pfcmpeq,
2872    [0xb4] = gen_helper_pfmul,
2873    [0xb6] = gen_helper_movq, /* pfrcpit2 */
2874    [0xb7] = gen_helper_pmulhrw_mmx,
2875    [0xbb] = gen_helper_pswapd,
2876    [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2877};
2878
2879struct SSEOpHelper_epp {
2880    SSEFunc_0_epp op[2];
2881    uint32_t ext_mask;
2882};
2883
2884struct SSEOpHelper_eppi {
2885    SSEFunc_0_eppi op[2];
2886    uint32_t ext_mask;
2887};
2888
2889#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2890#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2891#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2892#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2893#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2894        CPUID_EXT_PCLMULQDQ }
2895#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2896
2897static const struct SSEOpHelper_epp sse_op_table6[256] = {
2898    [0x00] = SSSE3_OP(pshufb),
2899    [0x01] = SSSE3_OP(phaddw),
2900    [0x02] = SSSE3_OP(phaddd),
2901    [0x03] = SSSE3_OP(phaddsw),
2902    [0x04] = SSSE3_OP(pmaddubsw),
2903    [0x05] = SSSE3_OP(phsubw),
2904    [0x06] = SSSE3_OP(phsubd),
2905    [0x07] = SSSE3_OP(phsubsw),
2906    [0x08] = SSSE3_OP(psignb),
2907    [0x09] = SSSE3_OP(psignw),
2908    [0x0a] = SSSE3_OP(psignd),
2909    [0x0b] = SSSE3_OP(pmulhrsw),
2910    [0x10] = SSE41_OP(pblendvb),
2911    [0x14] = SSE41_OP(blendvps),
2912    [0x15] = SSE41_OP(blendvpd),
2913    [0x17] = SSE41_OP(ptest),
2914    [0x1c] = SSSE3_OP(pabsb),
2915    [0x1d] = SSSE3_OP(pabsw),
2916    [0x1e] = SSSE3_OP(pabsd),
2917    [0x20] = SSE41_OP(pmovsxbw),
2918    [0x21] = SSE41_OP(pmovsxbd),
2919    [0x22] = SSE41_OP(pmovsxbq),
2920    [0x23] = SSE41_OP(pmovsxwd),
2921    [0x24] = SSE41_OP(pmovsxwq),
2922    [0x25] = SSE41_OP(pmovsxdq),
2923    [0x28] = SSE41_OP(pmuldq),
2924    [0x29] = SSE41_OP(pcmpeqq),
2925    [0x2a] = SSE41_SPECIAL, /* movntqda */
2926    [0x2b] = SSE41_OP(packusdw),
2927    [0x30] = SSE41_OP(pmovzxbw),
2928    [0x31] = SSE41_OP(pmovzxbd),
2929    [0x32] = SSE41_OP(pmovzxbq),
2930    [0x33] = SSE41_OP(pmovzxwd),
2931    [0x34] = SSE41_OP(pmovzxwq),
2932    [0x35] = SSE41_OP(pmovzxdq),
2933    [0x37] = SSE42_OP(pcmpgtq),
2934    [0x38] = SSE41_OP(pminsb),
2935    [0x39] = SSE41_OP(pminsd),
2936    [0x3a] = SSE41_OP(pminuw),
2937    [0x3b] = SSE41_OP(pminud),
2938    [0x3c] = SSE41_OP(pmaxsb),
2939    [0x3d] = SSE41_OP(pmaxsd),
2940    [0x3e] = SSE41_OP(pmaxuw),
2941    [0x3f] = SSE41_OP(pmaxud),
2942    [0x40] = SSE41_OP(pmulld),
2943    [0x41] = SSE41_OP(phminposuw),
2944    [0xdb] = AESNI_OP(aesimc),
2945    [0xdc] = AESNI_OP(aesenc),
2946    [0xdd] = AESNI_OP(aesenclast),
2947    [0xde] = AESNI_OP(aesdec),
2948    [0xdf] = AESNI_OP(aesdeclast),
2949};
2950
2951static const struct SSEOpHelper_eppi sse_op_table7[256] = {
2952    [0x08] = SSE41_OP(roundps),
2953    [0x09] = SSE41_OP(roundpd),
2954    [0x0a] = SSE41_OP(roundss),
2955    [0x0b] = SSE41_OP(roundsd),
2956    [0x0c] = SSE41_OP(blendps),
2957    [0x0d] = SSE41_OP(blendpd),
2958    [0x0e] = SSE41_OP(pblendw),
2959    [0x0f] = SSSE3_OP(palignr),
2960    [0x14] = SSE41_SPECIAL, /* pextrb */
2961    [0x15] = SSE41_SPECIAL, /* pextrw */
2962    [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
2963    [0x17] = SSE41_SPECIAL, /* extractps */
2964    [0x20] = SSE41_SPECIAL, /* pinsrb */
2965    [0x21] = SSE41_SPECIAL, /* insertps */
2966    [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
2967    [0x40] = SSE41_OP(dpps),
2968    [0x41] = SSE41_OP(dppd),
2969    [0x42] = SSE41_OP(mpsadbw),
2970    [0x44] = PCLMULQDQ_OP(pclmulqdq),
2971    [0x60] = SSE42_OP(pcmpestrm),
2972    [0x61] = SSE42_OP(pcmpestri),
2973    [0x62] = SSE42_OP(pcmpistrm),
2974    [0x63] = SSE42_OP(pcmpistri),
2975    [0xdf] = AESNI_OP(aeskeygenassist),
2976};
2977
2978static void gen_sse(CPUX86State *env, DisasContext *s, int b,
2979                    target_ulong pc_start, int rex_r)
2980{
2981    int b1, op1_offset, op2_offset, is_xmm, val;
2982    int modrm, mod, rm, reg;
2983    SSEFunc_0_epp sse_fn_epp;
2984    SSEFunc_0_eppi sse_fn_eppi;
2985    SSEFunc_0_ppi sse_fn_ppi;
2986    SSEFunc_0_eppt sse_fn_eppt;
2987    TCGMemOp ot;
2988
2989    b &= 0xff;
2990    if (s->prefix & PREFIX_DATA)
2991        b1 = 1;
2992    else if (s->prefix & PREFIX_REPZ)
2993        b1 = 2;
2994    else if (s->prefix & PREFIX_REPNZ)
2995        b1 = 3;
2996    else
2997        b1 = 0;
2998    sse_fn_epp = sse_op_table1[b][b1];
2999    if (!sse_fn_epp) {
3000        goto illegal_op;
3001    }
3002    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3003        is_xmm = 1;
3004    } else {
3005        if (b1 == 0) {
3006            /* MMX case */
3007            is_xmm = 0;
3008        } else {
3009            is_xmm = 1;
3010        }
3011    }
3012    /* simple MMX/SSE operation */
3013    if (s->flags & HF_TS_MASK) {
3014        gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3015        return;
3016    }
3017    if (s->flags & HF_EM_MASK) {
3018    illegal_op:
3019        gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
3020        return;
3021    }
3022    if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
3023        if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))
3024            goto illegal_op;
3025    if (b == 0x0e) {
3026        if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
3027            goto illegal_op;
3028        /* femms */
3029        gen_helper_emms(cpu_env);
3030        return;
3031    }
3032    if (b == 0x77) {
3033        /* emms */
3034        gen_helper_emms(cpu_env);
3035        return;
3036    }
3037    /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3038       the static cpu state) */
3039    if (!is_xmm) {
3040        gen_helper_enter_mmx(cpu_env);
3041    }
3042
3043    modrm = cpu_ldub_code(env, s->pc++);
3044    reg = ((modrm >> 3) & 7);
3045    if (is_xmm)
3046        reg |= rex_r;
3047    mod = (modrm >> 6) & 3;
3048    if (sse_fn_epp == SSE_SPECIAL) {
3049        b |= (b1 << 8);
3050        switch(b) {
3051        case 0x0e7: /* movntq */
3052            if (mod == 3)
3053                goto illegal_op;
3054            gen_lea_modrm(env, s, modrm);
3055            gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3056            break;
3057        case 0x1e7: /* movntdq */
3058        case 0x02b: /* movntps */
3059        case 0x12b: /* movntps */
3060            if (mod == 3)
3061                goto illegal_op;
3062            gen_lea_modrm(env, s, modrm);
3063            gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3064            break;
3065        case 0x3f0: /* lddqu */
3066            if (mod == 3)
3067                goto illegal_op;
3068            gen_lea_modrm(env, s, modrm);
3069            gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3070            break;
3071        case 0x22b: /* movntss */
3072        case 0x32b: /* movntsd */
3073            if (mod == 3)
3074                goto illegal_op;
3075            gen_lea_modrm(env, s, modrm);
3076            if (b1 & 1) {
3077                gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3078            } else {
3079                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
3080                    xmm_regs[reg].XMM_L(0)));
3081                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
3082            }
3083            break;
3084        case 0x6e: /* movd mm, ea */
3085#ifdef TARGET_X86_64
3086            if (s->dflag == MO_64) {
3087                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3088                tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
3089            } else
3090#endif
3091            {
3092                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3093                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3094                                 offsetof(CPUX86State,fpregs[reg].mmx));
3095                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3096                gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
3097            }
3098            break;
3099        case 0x16e: /* movd xmm, ea */
3100#ifdef TARGET_X86_64
3101            if (s->dflag == MO_64) {
3102                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3103                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3104                                 offsetof(CPUX86State,xmm_regs[reg]));
3105                gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
3106            } else
3107#endif
3108            {
3109                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3110                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3111                                 offsetof(CPUX86State,xmm_regs[reg]));
3112                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3113                gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
3114            }
3115            break;
3116        case 0x6f: /* movq mm, ea */
3117            if (mod != 3) {
3118                gen_lea_modrm(env, s, modrm);
3119                gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3120            } else {
3121                rm = (modrm & 7);
3122                tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
3123                               offsetof(CPUX86State,fpregs[rm].mmx));
3124                tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
3125                               offsetof(CPUX86State,fpregs[reg].mmx));
3126            }
3127            break;
3128        case 0x010: /* movups */
3129        case 0x110: /* movupd */
3130        case 0x028: /* movaps */
3131        case 0x128: /* movapd */
3132        case 0x16f: /* movdqa xmm, ea */
3133        case 0x26f: /* movdqu xmm, ea */
3134            if (mod != 3) {
3135                gen_lea_modrm(env, s, modrm);
3136                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3137            } else {
3138                rm = (modrm & 7) | REX_B(s);
3139                gen_op_movo(offsetof(CPUX86State,xmm_regs[reg]),
3140                            offsetof(CPUX86State,xmm_regs[rm]));
3141            }
3142            break;
3143        case 0x210: /* movss xmm, ea */
3144            if (mod != 3) {
3145                gen_lea_modrm(env, s, modrm);
3146                gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
3147                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3148                tcg_gen_movi_tl(cpu_T[0], 0);
3149                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3150                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3151                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3152            } else {
3153                rm = (modrm & 7) | REX_B(s);
3154                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3155                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3156            }
3157            break;
3158        case 0x310: /* movsd xmm, ea */
3159            if (mod != 3) {
3160                gen_lea_modrm(env, s, modrm);
3161                gen_ldq_env_A0(s, offsetof(CPUX86State,
3162                                           xmm_regs[reg].XMM_Q(0)));
3163                tcg_gen_movi_tl(cpu_T[0], 0);
3164                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3165                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3166            } else {
3167                rm = (modrm & 7) | REX_B(s);
3168                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3169                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3170            }
3171            break;
3172        case 0x012: /* movlps */
3173        case 0x112: /* movlpd */
3174            if (mod != 3) {
3175                gen_lea_modrm(env, s, modrm);
3176                gen_ldq_env_A0(s, offsetof(CPUX86State,
3177                                           xmm_regs[reg].XMM_Q(0)));
3178            } else {
3179                /* movhlps */
3180                rm = (modrm & 7) | REX_B(s);
3181                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3182                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3183            }
3184            break;
3185        case 0x212: /* movsldup */
3186            if (mod != 3) {
3187                gen_lea_modrm(env, s, modrm);
3188                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3189            } else {
3190                rm = (modrm & 7) | REX_B(s);
3191                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3192                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)));
3193                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3194                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(2)));
3195            }
3196            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3197                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3198            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3199                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
3200            break;
3201        case 0x312: /* movddup */
3202            if (mod != 3) {
3203                gen_lea_modrm(env, s, modrm);
3204                gen_ldq_env_A0(s, offsetof(CPUX86State,
3205                                           xmm_regs[reg].XMM_Q(0)));
3206            } else {
3207                rm = (modrm & 7) | REX_B(s);
3208                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3209                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3210            }
3211            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3212                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3213            break;
3214        case 0x016: /* movhps */
3215        case 0x116: /* movhpd */
3216            if (mod != 3) {
3217                gen_lea_modrm(env, s, modrm);
3218                gen_ldq_env_A0(s, offsetof(CPUX86State,
3219                                           xmm_regs[reg].XMM_Q(1)));
3220            } else {
3221                /* movlhps */
3222                rm = (modrm & 7) | REX_B(s);
3223                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)),
3224                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3225            }
3226            break;
3227        case 0x216: /* movshdup */
3228            if (mod != 3) {
3229                gen_lea_modrm(env, s, modrm);
3230                gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3231            } else {
3232                rm = (modrm & 7) | REX_B(s);
3233                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)),
3234                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(1)));
3235                gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)),
3236                            offsetof(CPUX86State,xmm_regs[rm].XMM_L(3)));
3237            }
3238            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)),
3239                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(1)));
3240            gen_op_movl(offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)),
3241                        offsetof(CPUX86State,xmm_regs[reg].XMM_L(3)));
3242            break;
3243        case 0x178:
3244        case 0x378:
3245            {
3246                int bit_index, field_length;
3247
3248                if (b1 == 1 && reg != 0)
3249                    goto illegal_op;
3250                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
3251                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
3252                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
3253                    offsetof(CPUX86State,xmm_regs[reg]));
3254                if (b1 == 1)
3255                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
3256                                       tcg_const_i32(bit_index),
3257                                       tcg_const_i32(field_length));
3258                else
3259                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
3260                                         tcg_const_i32(bit_index),
3261                                         tcg_const_i32(field_length));
3262            }
3263            break;
3264        case 0x7e: /* movd ea, mm */
3265#ifdef TARGET_X86_64
3266            if (s->dflag == MO_64) {
3267                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3268                               offsetof(CPUX86State,fpregs[reg].mmx));
3269                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3270            } else
3271#endif
3272            {
3273                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3274                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3275                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3276            }
3277            break;
3278        case 0x17e: /* movd ea, xmm */
3279#ifdef TARGET_X86_64
3280            if (s->dflag == MO_64) {
3281                tcg_gen_ld_i64(cpu_T[0], cpu_env, 
3282                               offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3283                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3284            } else
3285#endif
3286            {
3287                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
3288                                 offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3289                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3290            }
3291            break;
3292        case 0x27e: /* movq xmm, ea */
3293            if (mod != 3) {
3294                gen_lea_modrm(env, s, modrm);
3295                gen_ldq_env_A0(s, offsetof(CPUX86State,
3296                                           xmm_regs[reg].XMM_Q(0)));
3297            } else {
3298                rm = (modrm & 7) | REX_B(s);
3299                gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3300                            offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3301            }
3302            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3303            break;
3304        case 0x7f: /* movq ea, mm */
3305            if (mod != 3) {
3306                gen_lea_modrm(env, s, modrm);
3307                gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3308            } else {
3309                rm = (modrm & 7);
3310                gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
3311                            offsetof(CPUX86State,fpregs[reg].mmx));
3312            }
3313            break;
3314        case 0x011: /* movups */
3315        case 0x111: /* movupd */
3316        case 0x029: /* movaps */
3317        case 0x129: /* movapd */
3318        case 0x17f: /* movdqa ea, xmm */
3319        case 0x27f: /* movdqu ea, xmm */
3320            if (mod != 3) {
3321                gen_lea_modrm(env, s, modrm);
3322                gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3323            } else {
3324                rm = (modrm & 7) | REX_B(s);
3325                gen_op_movo(offsetof(CPUX86State,xmm_regs[rm]),
3326                            offsetof(CPUX86State,xmm_regs[reg]));
3327            }
3328            break;
3329        case 0x211: /* movss ea, xmm */
3330            if (mod != 3) {
3331                gen_lea_modrm(env, s, modrm);
3332                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3333                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
3334            } else {
3335                rm = (modrm & 7) | REX_B(s);
3336                gen_op_movl(offsetof(CPUX86State,xmm_regs[rm].XMM_L(0)),
3337                            offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
3338            }
3339            break;
3340        case 0x311: /* movsd ea, xmm */
3341            if (mod != 3) {
3342                gen_lea_modrm(env, s, modrm);
3343                gen_stq_env_A0(s, offsetof(CPUX86State,
3344                                           xmm_regs[reg].XMM_Q(0)));
3345            } else {
3346                rm = (modrm & 7) | REX_B(s);
3347                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3348                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3349            }
3350            break;
3351        case 0x013: /* movlps */
3352        case 0x113: /* movlpd */
3353            if (mod != 3) {
3354                gen_lea_modrm(env, s, modrm);
3355                gen_stq_env_A0(s, offsetof(CPUX86State,
3356                                           xmm_regs[reg].XMM_Q(0)));
3357            } else {
3358                goto illegal_op;
3359            }
3360            break;
3361        case 0x017: /* movhps */
3362        case 0x117: /* movhpd */
3363            if (mod != 3) {
3364                gen_lea_modrm(env, s, modrm);
3365                gen_stq_env_A0(s, offsetof(CPUX86State,
3366                                           xmm_regs[reg].XMM_Q(1)));
3367            } else {
3368                goto illegal_op;
3369            }
3370            break;
3371        case 0x71: /* shift mm, im */
3372        case 0x72:
3373        case 0x73:
3374        case 0x171: /* shift xmm, im */
3375        case 0x172:
3376        case 0x173:
3377            if (b1 >= 2) {
3378                goto illegal_op;
3379            }
3380            val = cpu_ldub_code(env, s->pc++);
3381            if (is_xmm) {
3382                tcg_gen_movi_tl(cpu_T[0], val);
3383                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3384                tcg_gen_movi_tl(cpu_T[0], 0);
3385                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(1)));
3386                op1_offset = offsetof(CPUX86State,xmm_t0);
3387            } else {
3388                tcg_gen_movi_tl(cpu_T[0], val);
3389                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(0)));
3390                tcg_gen_movi_tl(cpu_T[0], 0);
3391                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
3392                op1_offset = offsetof(CPUX86State,mmx_t0);
3393            }
3394            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3395                                       (((modrm >> 3)) & 7)][b1];
3396            if (!sse_fn_epp) {
3397                goto illegal_op;
3398            }
3399            if (is_xmm) {
3400                rm = (modrm & 7) | REX_B(s);
3401                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3402            } else {
3403                rm = (modrm & 7);
3404                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3405            }
3406            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3407            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
3408            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3409            break;
3410        case 0x050: /* movmskps */
3411            rm = (modrm & 7) | REX_B(s);
3412            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3413                             offsetof(CPUX86State,xmm_regs[rm]));
3414            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3415            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3416            break;
3417        case 0x150: /* movmskpd */
3418            rm = (modrm & 7) | REX_B(s);
3419            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
3420                             offsetof(CPUX86State,xmm_regs[rm]));
3421            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3422            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3423            break;
3424        case 0x02a: /* cvtpi2ps */
3425        case 0x12a: /* cvtpi2pd */
3426            gen_helper_enter_mmx(cpu_env);
3427            if (mod != 3) {
3428                gen_lea_modrm(env, s, modrm);
3429                op2_offset = offsetof(CPUX86State,mmx_t0);
3430                gen_ldq_env_A0(s, op2_offset);
3431            } else {
3432                rm = (modrm & 7);
3433                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3434            }
3435            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3436            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3437            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3438            switch(b >> 8) {
3439            case 0x0:
3440                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
3441                break;
3442            default:
3443            case 0x1:
3444                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
3445                break;
3446            }
3447            break;
3448        case 0x22a: /* cvtsi2ss */
3449        case 0x32a: /* cvtsi2sd */
3450            ot = mo_64_32(s->dflag);
3451            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3452            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3453            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3454            if (ot == MO_32) {
3455                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3456                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3457                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
3458            } else {
3459#ifdef TARGET_X86_64
3460                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3461                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T[0]);
3462#else
3463                goto illegal_op;
3464#endif
3465            }
3466            break;
3467        case 0x02c: /* cvttps2pi */
3468        case 0x12c: /* cvttpd2pi */
3469        case 0x02d: /* cvtps2pi */
3470        case 0x12d: /* cvtpd2pi */
3471            gen_helper_enter_mmx(cpu_env);
3472            if (mod != 3) {
3473                gen_lea_modrm(env, s, modrm);
3474                op2_offset = offsetof(CPUX86State,xmm_t0);
3475                gen_ldo_env_A0(s, op2_offset);
3476            } else {
3477                rm = (modrm & 7) | REX_B(s);
3478                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3479            }
3480            op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3481            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3482            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3483            switch(b) {
3484            case 0x02c:
3485                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3486                break;
3487            case 0x12c:
3488                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3489                break;
3490            case 0x02d:
3491                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3492                break;
3493            case 0x12d:
3494                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
3495                break;
3496            }
3497            break;
3498        case 0x22c: /* cvttss2si */
3499        case 0x32c: /* cvttsd2si */
3500        case 0x22d: /* cvtss2si */
3501        case 0x32d: /* cvtsd2si */
3502            ot = mo_64_32(s->dflag);
3503            if (mod != 3) {
3504                gen_lea_modrm(env, s, modrm);
3505                if ((b >> 8) & 1) {
3506                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
3507                } else {
3508                    gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
3509                    tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
3510                }
3511                op2_offset = offsetof(CPUX86State,xmm_t0);
3512            } else {
3513                rm = (modrm & 7) | REX_B(s);
3514                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3515            }
3516            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
3517            if (ot == MO_32) {
3518                SSEFunc_i_ep sse_fn_i_ep =
3519                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3520                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3521                tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
3522            } else {
3523#ifdef TARGET_X86_64
3524                SSEFunc_l_ep sse_fn_l_ep =
3525                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3526                sse_fn_l_ep(cpu_T[0], cpu_env, cpu_ptr0);
3527#else
3528                goto illegal_op;
3529#endif
3530            }
3531            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3532            break;
3533        case 0xc4: /* pinsrw */
3534        case 0x1c4:
3535            s->rip_offset = 1;
3536            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3537            val = cpu_ldub_code(env, s->pc++);
3538            if (b1) {
3539                val &= 7;
3540                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3541                                offsetof(CPUX86State,xmm_regs[reg].XMM_W(val)));
3542            } else {
3543                val &= 3;
3544                tcg_gen_st16_tl(cpu_T[0], cpu_env,
3545                                offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3546            }
3547            break;
3548        case 0xc5: /* pextrw */
3549        case 0x1c5:
3550            if (mod != 3)
3551                goto illegal_op;
3552            ot = mo_64_32(s->dflag);
3553            val = cpu_ldub_code(env, s->pc++);
3554            if (b1) {
3555                val &= 7;
3556                rm = (modrm & 7) | REX_B(s);
3557                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3558                                 offsetof(CPUX86State,xmm_regs[rm].XMM_W(val)));
3559            } else {
3560                val &= 3;
3561                rm = (modrm & 7);
3562                tcg_gen_ld16u_tl(cpu_T[0], cpu_env,
3563                                offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3564            }
3565            reg = ((modrm >> 3) & 7) | rex_r;
3566            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3567            break;
3568        case 0x1d6: /* movq ea, xmm */
3569            if (mod != 3) {
3570                gen_lea_modrm(env, s, modrm);
3571                gen_stq_env_A0(s, offsetof(CPUX86State,
3572                                           xmm_regs[reg].XMM_Q(0)));
3573            } else {
3574                rm = (modrm & 7) | REX_B(s);
3575                gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
3576                            offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
3577                gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
3578            }
3579            break;
3580        case 0x2d6: /* movq2dq */
3581            gen_helper_enter_mmx(cpu_env);
3582            rm = (modrm & 7);
3583            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
3584                        offsetof(CPUX86State,fpregs[rm].mmx));
3585            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
3586            break;
3587        case 0x3d6: /* movdq2q */
3588            gen_helper_enter_mmx(cpu_env);
3589            rm = (modrm & 7) | REX_B(s);
3590            gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
3591                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
3592            break;
3593        case 0xd7: /* pmovmskb */
3594        case 0x1d7:
3595            if (mod != 3)
3596                goto illegal_op;
3597            if (b1) {
3598                rm = (modrm & 7) | REX_B(s);
3599                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
3600                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3601            } else {
3602                rm = (modrm & 7);
3603                tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
3604                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
3605            }
3606            reg = ((modrm >> 3) & 7) | rex_r;
3607            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
3608            break;
3609
3610        case 0x138:
3611        case 0x038:
3612            b = modrm;
3613            if ((b & 0xf0) == 0xf0) {
3614                goto do_0f_38_fx;
3615            }
3616            modrm = cpu_ldub_code(env, s->pc++);
3617            rm = modrm & 7;
3618            reg = ((modrm >> 3) & 7) | rex_r;
3619            mod = (modrm >> 6) & 3;
3620            if (b1 >= 2) {
3621                goto illegal_op;
3622            }
3623
3624            sse_fn_epp = sse_op_table6[b].op[b1];
3625            if (!sse_fn_epp) {
3626                goto illegal_op;
3627            }
3628            if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3629                goto illegal_op;
3630
3631            if (b1) {
3632                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3633                if (mod == 3) {
3634                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3635                } else {
3636                    op2_offset = offsetof(CPUX86State,xmm_t0);
3637                    gen_lea_modrm(env, s, modrm);
3638                    switch (b) {
3639                    case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3640                    case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3641                    case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3642                        gen_ldq_env_A0(s, op2_offset +
3643                                        offsetof(XMMReg, XMM_Q(0)));
3644                        break;
3645                    case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3646                    case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3647                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
3648                                            s->mem_index, MO_LEUL);
3649                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
3650                                        offsetof(XMMReg, XMM_L(0)));
3651                        break;
3652                    case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3653                        tcg_gen_qemu_ld_tl(cpu_tmp0, cpu_A0,
3654                                           s->mem_index, MO_LEUW);
3655                        tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
3656                                        offsetof(XMMReg, XMM_W(0)));
3657                        break;
3658                    case 0x2a:            /* movntqda */
3659                        gen_ldo_env_A0(s, op1_offset);
3660                        return;
3661                    default:
3662                        gen_ldo_env_A0(s, op2_offset);
3663                    }
3664                }
3665            } else {
3666                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3667                if (mod == 3) {
3668                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3669                } else {
3670                    op2_offset = offsetof(CPUX86State,mmx_t0);
3671                    gen_lea_modrm(env, s, modrm);
3672                    gen_ldq_env_A0(s, op2_offset);
3673                }
3674            }
3675            if (sse_fn_epp == SSE_SPECIAL) {
3676                goto illegal_op;
3677            }
3678
3679            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
3680            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
3681            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
3682
3683            if (b == 0x17) {
3684                set_cc_op(s, CC_OP_EFLAGS);
3685            }
3686            break;
3687
3688        case 0x238:
3689        case 0x338:
3690        do_0f_38_fx:
3691            /* Various integer extensions at 0f 38 f[0-f].  */
3692            b = modrm | (b1 << 8);
3693            modrm = cpu_ldub_code(env, s->pc++);
3694            reg = ((modrm >> 3) & 7) | rex_r;
3695
3696            switch (b) {
3697            case 0x3f0: /* crc32 Gd,Eb */
3698            case 0x3f1: /* crc32 Gd,Ey */
3699            do_crc32:
3700                if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3701                    goto illegal_op;
3702                }
3703                if ((b & 0xff) == 0xf0) {
3704                    ot = MO_8;
3705                } else if (s->dflag != MO_64) {
3706                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3707                } else {
3708                    ot = MO_64;
3709                }
3710
3711                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
3712                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3713                gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
3714                                 cpu_T[0], tcg_const_i32(8 << ot));
3715
3716                ot = mo_64_32(s->dflag);
3717                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3718                break;
3719
3720            case 0x1f0: /* crc32 or movbe */
3721            case 0x1f1:
3722                /* For these insns, the f3 prefix is supposed to have priority
3723                   over the 66 prefix, but that's not what we implement above
3724                   setting b1.  */
3725                if (s->prefix & PREFIX_REPNZ) {
3726                    goto do_crc32;
3727                }
3728                /* FALLTHRU */
3729            case 0x0f0: /* movbe Gy,My */
3730            case 0x0f1: /* movbe My,Gy */
3731                if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3732                    goto illegal_op;
3733                }
3734                if (s->dflag != MO_64) {
3735                    ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3736                } else {
3737                    ot = MO_64;
3738                }
3739
3740                gen_lea_modrm(env, s, modrm);
3741                if ((b & 1) == 0) {
3742                    tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
3743                                       s->mem_index, ot | MO_BE);
3744                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3745                } else {
3746                    tcg_gen_qemu_st_tl(cpu_regs[reg], cpu_A0,
3747                                       s->mem_index, ot | MO_BE);
3748                }
3749                break;
3750
3751            case 0x0f2: /* andn Gy, By, Ey */
3752                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3753                    || !(s->prefix & PREFIX_VEX)
3754                    || s->vex_l != 0) {
3755                    goto illegal_op;
3756                }
3757                ot = mo_64_32(s->dflag);
3758                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3759                tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
3760                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3761                gen_op_update1_cc();
3762                set_cc_op(s, CC_OP_LOGICB + ot);
3763                break;
3764
3765            case 0x0f7: /* bextr Gy, Ey, By */
3766                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3767                    || !(s->prefix & PREFIX_VEX)
3768                    || s->vex_l != 0) {
3769                    goto illegal_op;
3770                }
3771                ot = mo_64_32(s->dflag);
3772                {
3773                    TCGv bound, zero;
3774
3775                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3776                    /* Extract START, and shift the operand.
3777                       Shifts larger than operand size get zeros.  */
3778                    tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
3779                    tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_A0);
3780
3781                    bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3782                    zero = tcg_const_tl(0);
3783                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T[0], cpu_A0, bound,
3784                                       cpu_T[0], zero);
3785                    tcg_temp_free(zero);
3786
3787                    /* Extract the LEN into a mask.  Lengths larger than
3788                       operand size get all ones.  */
3789                    tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8);
3790                    tcg_gen_ext8u_tl(cpu_A0, cpu_A0);
3791                    tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound,
3792                                       cpu_A0, bound);
3793                    tcg_temp_free(bound);
3794                    tcg_gen_movi_tl(cpu_T[1], 1);
3795                    tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_A0);
3796                    tcg_gen_subi_tl(cpu_T[1], cpu_T[1], 1);
3797                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3798
3799                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3800                    gen_op_update1_cc();
3801                    set_cc_op(s, CC_OP_LOGICB + ot);
3802                }
3803                break;
3804
3805            case 0x0f5: /* bzhi Gy, Ey, By */
3806                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3807                    || !(s->prefix & PREFIX_VEX)
3808                    || s->vex_l != 0) {
3809                    goto illegal_op;
3810                }
3811                ot = mo_64_32(s->dflag);
3812                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3813                tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
3814                {
3815                    TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3816                    /* Note that since we're using BMILG (in order to get O
3817                       cleared) we need to store the inverse into C.  */
3818                    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3819                                       cpu_T[1], bound);
3820                    tcg_gen_movcond_tl(TCG_COND_GT, cpu_T[1], cpu_T[1],
3821                                       bound, bound, cpu_T[1]);
3822                    tcg_temp_free(bound);
3823                }
3824                tcg_gen_movi_tl(cpu_A0, -1);
3825                tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T[1]);
3826                tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_A0);
3827                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
3828                gen_op_update1_cc();
3829                set_cc_op(s, CC_OP_BMILGB + ot);
3830                break;
3831
3832            case 0x3f6: /* mulx By, Gy, rdx, Ey */
3833                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3834                    || !(s->prefix & PREFIX_VEX)
3835                    || s->vex_l != 0) {
3836                    goto illegal_op;
3837                }
3838                ot = mo_64_32(s->dflag);
3839                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3840                switch (ot) {
3841                default:
3842                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
3843                    tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
3844                    tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
3845                                      cpu_tmp2_i32, cpu_tmp3_i32);
3846                    tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
3847                    tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
3848                    break;
3849#ifdef TARGET_X86_64
3850                case MO_64:
3851                    tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg],
3852                                      cpu_T[0], cpu_regs[R_EDX]);
3853                    break;
3854#endif
3855                }
3856                break;
3857
3858            case 0x3f5: /* pdep Gy, By, Ey */
3859                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3860                    || !(s->prefix & PREFIX_VEX)
3861                    || s->vex_l != 0) {
3862                    goto illegal_op;
3863                }
3864                ot = mo_64_32(s->dflag);
3865                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3866                /* Note that by zero-extending the mask operand, we
3867                   automatically handle zero-extending the result.  */
3868                if (ot == MO_64) {
3869                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
3870                } else {
3871                    tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
3872                }
3873                gen_helper_pdep(cpu_regs[reg], cpu_T[0], cpu_T[1]);
3874                break;
3875
3876            case 0x2f5: /* pext Gy, By, Ey */
3877                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3878                    || !(s->prefix & PREFIX_VEX)
3879                    || s->vex_l != 0) {
3880                    goto illegal_op;
3881                }
3882                ot = mo_64_32(s->dflag);
3883                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3884                /* Note that by zero-extending the mask operand, we
3885                   automatically handle zero-extending the result.  */
3886                if (ot == MO_64) {
3887                    tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]);
3888                } else {
3889                    tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]);
3890                }
3891                gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
3892                break;
3893
3894            case 0x1f6: /* adcx Gy, Ey */
3895            case 0x2f6: /* adox Gy, Ey */
3896                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3897                    goto illegal_op;
3898                } else {
3899                    TCGv carry_in, carry_out, zero;
3900                    int end_op;
3901
3902                    ot = mo_64_32(s->dflag);
3903                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3904
3905                    /* Re-use the carry-out from a previous round.  */
3906                    TCGV_UNUSED(carry_in);
3907                    carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3908                    switch (s->cc_op) {
3909                    case CC_OP_ADCX:
3910                        if (b == 0x1f6) {
3911                            carry_in = cpu_cc_dst;
3912                            end_op = CC_OP_ADCX;
3913                        } else {
3914                            end_op = CC_OP_ADCOX;
3915                        }
3916                        break;
3917                    case CC_OP_ADOX:
3918                        if (b == 0x1f6) {
3919                            end_op = CC_OP_ADCOX;
3920                        } else {
3921                            carry_in = cpu_cc_src2;
3922                            end_op = CC_OP_ADOX;
3923                        }
3924                        break;
3925                    case CC_OP_ADCOX:
3926                        end_op = CC_OP_ADCOX;
3927                        carry_in = carry_out;
3928                        break;
3929                    default:
3930                        end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
3931                        break;
3932                    }
3933                    /* If we can't reuse carry-out, get it out of EFLAGS.  */
3934                    if (TCGV_IS_UNUSED(carry_in)) {
3935                        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
3936                            gen_compute_eflags(s);
3937                        }
3938                        carry_in = cpu_tmp0;
3939                        tcg_gen_shri_tl(carry_in, cpu_cc_src,
3940                                        ctz32(b == 0x1f6 ? CC_C : CC_O));
3941                        tcg_gen_andi_tl(carry_in, carry_in, 1);
3942                    }
3943
3944                    switch (ot) {
3945#ifdef TARGET_X86_64
3946                    case MO_32:
3947                        /* If we know TL is 64-bit, and we want a 32-bit
3948                           result, just do everything in 64-bit arithmetic.  */
3949                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
3950                        tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]);
3951                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]);
3952                        tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in);
3953                        tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]);
3954                        tcg_gen_shri_i64(carry_out, cpu_T[0], 32);
3955                        break;
3956#endif
3957                    default:
3958                        /* Otherwise compute the carry-out in two steps.  */
3959                        zero = tcg_const_tl(0);
3960                        tcg_gen_add2_tl(cpu_T[0], carry_out,
3961                                        cpu_T[0], zero,
3962                                        carry_in, zero);
3963                        tcg_gen_add2_tl(cpu_regs[reg], carry_out,
3964                                        cpu_regs[reg], carry_out,
3965                                        cpu_T[0], zero);
3966                        tcg_temp_free(zero);
3967                        break;
3968                    }
3969                    set_cc_op(s, end_op);
3970                }
3971                break;
3972
3973            case 0x1f7: /* shlx Gy, Ey, By */
3974            case 0x2f7: /* sarx Gy, Ey, By */
3975            case 0x3f7: /* shrx Gy, Ey, By */
3976                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3977                    || !(s->prefix & PREFIX_VEX)
3978                    || s->vex_l != 0) {
3979                    goto illegal_op;
3980                }
3981                ot = mo_64_32(s->dflag);
3982                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3983                if (ot == MO_64) {
3984                    tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
3985                } else {
3986                    tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 31);
3987                }
3988                if (b == 0x1f7) {
3989                    tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3990                } else if (b == 0x2f7) {
3991                    if (ot != MO_64) {
3992                        tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
3993                    }
3994                    tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
3995                } else {
3996                    if (ot != MO_64) {
3997                        tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
3998                    }
3999                    tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4000                }
4001                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
4002                break;
4003
4004            case 0x0f3:
4005            case 0x1f3:
4006            case 0x2f3:
4007            case 0x3f3: /* Group 17 */
4008                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4009                    || !(s->prefix & PREFIX_VEX)
4010                    || s->vex_l != 0) {
4011                    goto illegal_op;
4012                }
4013                ot = mo_64_32(s->dflag);
4014                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4015
4016                switch (reg & 7) {
4017                case 1: /* blsr By,Ey */
4018                    tcg_gen_neg_tl(cpu_T[1], cpu_T[0]);
4019                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4020                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T[0]);
4021                    gen_op_update2_cc();
4022                    set_cc_op(s, CC_OP_BMILGB + ot);
4023                    break;
4024
4025                case 2: /* blsmsk By,Ey */
4026                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4027                    tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
4028                    tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
4029                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4030                    set_cc_op(s, CC_OP_BMILGB + ot);
4031                    break;
4032
4033                case 3: /* blsi By, Ey */
4034                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4035                    tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1);
4036                    tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
4037                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4038                    set_cc_op(s, CC_OP_BMILGB + ot);
4039                    break;
4040
4041                default:
4042                    goto illegal_op;
4043                }
4044                break;
4045
4046            default:
4047                goto illegal_op;
4048            }
4049            break;
4050
4051        case 0x03a:
4052        case 0x13a:
4053            b = modrm;
4054            modrm = cpu_ldub_code(env, s->pc++);
4055            rm = modrm & 7;
4056            reg = ((modrm >> 3) & 7) | rex_r;
4057            mod = (modrm >> 6) & 3;
4058            if (b1 >= 2) {
4059                goto illegal_op;
4060            }
4061
4062            sse_fn_eppi = sse_op_table7[b].op[b1];
4063            if (!sse_fn_eppi) {
4064                goto illegal_op;
4065            }
4066            if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4067                goto illegal_op;
4068
4069            if (sse_fn_eppi == SSE_SPECIAL) {
4070                ot = mo_64_32(s->dflag);
4071                rm = (modrm & 7) | REX_B(s);
4072                if (mod != 3)
4073                    gen_lea_modrm(env, s, modrm);
4074                reg = ((modrm >> 3) & 7) | rex_r;
4075                val = cpu_ldub_code(env, s->pc++);
4076                switch (b) {
4077                case 0x14: /* pextrb */
4078                    tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
4079                                            xmm_regs[reg].XMM_B(val & 15)));
4080                    if (mod == 3) {
4081                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4082                    } else {
4083                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
4084                                           s->mem_index, MO_UB);
4085                    }
4086                    break;
4087                case 0x15: /* pextrw */
4088                    tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
4089                                            xmm_regs[reg].XMM_W(val & 7)));
4090                    if (mod == 3) {
4091                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4092                    } else {
4093                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
4094                                           s->mem_index, MO_LEUW);
4095                    }
4096                    break;
4097                case 0x16:
4098                    if (ot == MO_32) { /* pextrd */
4099                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4100                                        offsetof(CPUX86State,
4101                                                xmm_regs[reg].XMM_L(val & 3)));
4102                        if (mod == 3) {
4103                            tcg_gen_extu_i32_tl(cpu_regs[rm], cpu_tmp2_i32);
4104                        } else {
4105                            tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
4106                                                s->mem_index, MO_LEUL);
4107                        }
4108                    } else { /* pextrq */
4109#ifdef TARGET_X86_64
4110                        tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
4111                                        offsetof(CPUX86State,
4112                                                xmm_regs[reg].XMM_Q(val & 1)));
4113                        if (mod == 3) {
4114                            tcg_gen_mov_i64(cpu_regs[rm], cpu_tmp1_i64);
4115                        } else {
4116                            tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
4117                                                s->mem_index, MO_LEQ);
4118                        }
4119#else
4120                        goto illegal_op;
4121#endif
4122                    }
4123                    break;
4124                case 0x17: /* extractps */
4125                    tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
4126                                            xmm_regs[reg].XMM_L(val & 3)));
4127                    if (mod == 3) {
4128                        gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4129                    } else {
4130                        tcg_gen_qemu_st_tl(cpu_T[0], cpu_A0,
4131                                           s->mem_index, MO_LEUL);
4132                    }
4133                    break;
4134                case 0x20: /* pinsrb */
4135                    if (mod == 3) {
4136                        gen_op_mov_v_reg(MO_32, cpu_T[0], rm);
4137                    } else {
4138                        tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
4139                                           s->mem_index, MO_UB);
4140                    }
4141                    tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
4142                                            xmm_regs[reg].XMM_B(val & 15)));
4143                    break;
4144                case 0x21: /* insertps */
4145                    if (mod == 3) {
4146                        tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
4147                                        offsetof(CPUX86State,xmm_regs[rm]
4148                                                .XMM_L((val >> 6) & 3)));
4149                    } else {
4150                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4151                                            s->mem_index, MO_LEUL);
4152                    }
4153                    tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4154                                    offsetof(CPUX86State,xmm_regs[reg]
4155                                            .XMM_L((val >> 4) & 3)));
4156                    if ((val >> 0) & 1)
4157                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4158                                        cpu_env, offsetof(CPUX86State,
4159                                                xmm_regs[reg].XMM_L(0)));
4160                    if ((val >> 1) & 1)
4161                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4162                                        cpu_env, offsetof(CPUX86State,
4163                                                xmm_regs[reg].XMM_L(1)));
4164                    if ((val >> 2) & 1)
4165                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4166                                        cpu_env, offsetof(CPUX86State,
4167                                                xmm_regs[reg].XMM_L(2)));
4168                    if ((val >> 3) & 1)
4169                        tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4170                                        cpu_env, offsetof(CPUX86State,
4171                                                xmm_regs[reg].XMM_L(3)));
4172                    break;
4173                case 0x22:
4174                    if (ot == MO_32) { /* pinsrd */
4175                        if (mod == 3) {
4176                            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[rm]);
4177                        } else {
4178                            tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
4179                                                s->mem_index, MO_LEUL);
4180                        }
4181                        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
4182                                        offsetof(CPUX86State,
4183                                                xmm_regs[reg].XMM_L(val & 3)));
4184                    } else { /* pinsrq */
4185#ifdef TARGET_X86_64
4186                        if (mod == 3) {
4187                            gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
4188                        } else {
4189                            tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
4190                                                s->mem_index, MO_LEQ);
4191                        }
4192                        tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
4193                                        offsetof(CPUX86State,
4194                                                xmm_regs[reg].XMM_Q(val & 1)));
4195#else
4196                        goto illegal_op;
4197#endif
4198                    }
4199                    break;
4200                }
4201                return;
4202            }
4203
4204            if (b1) {
4205                op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4206                if (mod == 3) {
4207                    op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4208                } else {
4209                    op2_offset = offsetof(CPUX86State,xmm_t0);
4210                    gen_lea_modrm(env, s, modrm);
4211                    gen_ldo_env_A0(s, op2_offset);
4212                }
4213            } else {
4214                op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4215                if (mod == 3) {
4216                    op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4217                } else {
4218                    op2_offset = offsetof(CPUX86State,mmx_t0);
4219                    gen_lea_modrm(env, s, modrm);
4220                    gen_ldq_env_A0(s, op2_offset);
4221                }
4222            }
4223            val = cpu_ldub_code(env, s->pc++);
4224
4225            if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4226                set_cc_op(s, CC_OP_EFLAGS);
4227
4228                if (s->dflag == MO_64) {
4229                    /* The helper must use entire 64-bit gp registers */
4230                    val |= 1 << 8;
4231                }
4232            }
4233
4234            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4235            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4236            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4237            break;
4238
4239        case 0x33a:
4240            /* Various integer extensions at 0f 3a f[0-f].  */
4241            b = modrm | (b1 << 8);
4242            modrm = cpu_ldub_code(env, s->pc++);
4243            reg = ((modrm >> 3) & 7) | rex_r;
4244
4245            switch (b) {
4246            case 0x3f0: /* rorx Gy,Ey, Ib */
4247                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4248                    || !(s->prefix & PREFIX_VEX)
4249                    || s->vex_l != 0) {
4250                    goto illegal_op;
4251                }
4252                ot = mo_64_32(s->dflag);
4253                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4254                b = cpu_ldub_code(env, s->pc++);
4255                if (ot == MO_64) {
4256                    tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
4257                } else {
4258                    tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4259                    tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31);
4260                    tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
4261                }
4262                gen_op_mov_reg_v(ot, reg, cpu_T[0]);
4263                break;
4264
4265            default:
4266                goto illegal_op;
4267            }
4268            break;
4269
4270        default:
4271            goto illegal_op;
4272        }
4273    } else {
4274        /* generic MMX or SSE operation */
4275        switch(b) {
4276        case 0x70: /* pshufx insn */
4277        case 0xc6: /* pshufx insn */
4278        case 0xc2: /* compare insns */
4279            s->rip_offset = 1;
4280            break;
4281        default:
4282            break;
4283        }
4284        if (is_xmm) {
4285            op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4286            if (mod != 3) {
4287                int sz = 4;
4288
4289                gen_lea_modrm(env, s, modrm);
4290                op2_offset = offsetof(CPUX86State,xmm_t0);
4291
4292                switch (b) {
4293                case 0x50 ... 0x5a:
4294                case 0x5c ... 0x5f:
4295                case 0xc2:
4296                    /* Most sse scalar operations.  */
4297                    if (b1 == 2) {
4298                        sz = 2;
4299                    } else if (b1 == 3) {
4300                        sz = 3;
4301                    }
4302                    break;
4303
4304                case 0x2e:  /* ucomis[sd] */
4305                case 0x2f:  /* comis[sd] */
4306                    if (b1 == 0) {
4307                        sz = 2;
4308                    } else {
4309                        sz = 3;
4310                    }
4311                    break;
4312                }
4313
4314                switch (sz) {
4315                case 2:
4316                    /* 32 bit access */
4317                    gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
4318                    tcg_gen_st32_tl(cpu_T[0], cpu_env,
4319                                    offsetof(CPUX86State,xmm_t0.XMM_L(0)));
4320                    break;
4321                case 3:
4322                    /* 64 bit access */
4323                    gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_D(0)));
4324                    break;
4325                default:
4326                    /* 128 bit access */
4327                    gen_ldo_env_A0(s, op2_offset);
4328                    break;
4329                }
4330            } else {
4331                rm = (modrm & 7) | REX_B(s);
4332                op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4333            }
4334        } else {
4335            op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4336            if (mod != 3) {
4337                gen_lea_modrm(env, s, modrm);
4338                op2_offset = offsetof(CPUX86State,mmx_t0);
4339                gen_ldq_env_A0(s, op2_offset);
4340            } else {
4341                rm = (modrm & 7);
4342                op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4343            }
4344        }
4345        switch(b) {
4346        case 0x0f: /* 3DNow! data insns */
4347            if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
4348                goto illegal_op;
4349            val = cpu_ldub_code(env, s->pc++);
4350            sse_fn_epp = sse_op_table5[val];
4351            if (!sse_fn_epp) {
4352                goto illegal_op;
4353            }
4354            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4355            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4356            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4357            break;
4358        case 0x70: /* pshufx insn */
4359        case 0xc6: /* pshufx insn */
4360            val = cpu_ldub_code(env, s->pc++);
4361            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4362            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4363            /* XXX: introduce a new table? */
4364            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4365            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
4366            break;
4367        case 0xc2:
4368            /* compare insns */
4369            val = cpu_ldub_code(env, s->pc++);
4370            if (val >= 8)
4371                goto illegal_op;
4372            sse_fn_epp = sse_op_table4[val][b1];
4373
4374            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4375            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4376            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4377            break;
4378        case 0xf7:
4379            /* maskmov : we must prepare A0 */
4380            if (mod != 3)
4381                goto illegal_op;
4382            tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EDI]);
4383            gen_extu(s->aflag, cpu_A0);
4384            gen_add_A0_ds_seg(s);
4385
4386            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4387            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4388            /* XXX: introduce a new table? */
4389            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4390            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
4391            break;
4392        default:
4393            tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
4394            tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
4395            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
4396            break;
4397        }
4398        if (b == 0x2e || b == 0x2f) {
4399            set_cc_op(s, CC_OP_EFLAGS);
4400        }
4401    }
4402}
4403
4404/* convert one instruction. s->is_jmp is set if the translation must
4405   be stopped. Return the next pc value */
4406static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
4407                               target_ulong pc_start)
4408{
4409    int b, prefixes;
4410    int shift;
4411    TCGMemOp ot, aflag, dflag;
4412    int modrm, reg, rm, mod, op, opreg, val;
4413    target_ulong next_eip, tval;
4414    int rex_w, rex_r;
4415
4416    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
4417        tcg_gen_debug_insn_start(pc_start);
4418    }
4419    s->pc = pc_start;
4420    prefixes = 0;
4421    s->override = -1;
4422    rex_w = -1;
4423    rex_r = 0;
4424#ifdef TARGET_X86_64
4425    s->rex_x = 0;
4426    s->rex_b = 0;
4427    x86_64_hregs = 0;
4428#endif
4429    s->rip_offset = 0; /* for relative ip address */
4430    s->vex_l = 0;
4431    s->vex_v = 0;
4432 next_byte:
4433    b = cpu_ldub_code(env, s->pc);
4434    s->pc++;
4435    /* Collect prefixes.  */
4436    switch (b) {
4437    case 0xf3:
4438        prefixes |= PREFIX_REPZ;
4439        goto next_byte;
4440    case 0xf2:
4441        prefixes |= PREFIX_REPNZ;
4442        goto next_byte;
4443    case 0xf0:
4444        prefixes |= PREFIX_LOCK;
4445        goto next_byte;
4446    case 0x2e:
4447        s->override = R_CS;
4448        goto next_byte;
4449    case 0x36:
4450        s->override = R_SS;
4451        goto next_byte;
4452    case 0x3e:
4453        s->override = R_DS;
4454        goto next_byte;
4455    case 0x26:
4456        s->override = R_ES;
4457        goto next_byte;
4458    case 0x64:
4459        s->override = R_FS;
4460        goto next_byte;
4461    case 0x65:
4462        s->override = R_GS;
4463        goto next_byte;
4464    case 0x66:
4465        prefixes |= PREFIX_DATA;
4466        goto next_byte;
4467    case 0x67:
4468        prefixes |= PREFIX_ADR;
4469        goto next_byte;
4470#ifdef TARGET_X86_64
4471    case 0x40 ... 0x4f:
4472        if (CODE64(s)) {
4473            /* REX prefix */
4474            rex_w = (b >> 3) & 1;
4475            rex_r = (b & 0x4) << 1;
4476            s->rex_x = (b & 0x2) << 2;
4477            REX_B(s) = (b & 0x1) << 3;
4478            x86_64_hregs = 1; /* select uniform byte register addressing */
4479            goto next_byte;
4480        }
4481        break;
4482#endif
4483    case 0xc5: /* 2-byte VEX */
4484    case 0xc4: /* 3-byte VEX */
4485        /* VEX prefixes cannot be used except in 32-bit mode.
4486           Otherwise the instruction is LES or LDS.  */
4487        if (s->code32 && !s->vm86) {
4488            static const int pp_prefix[4] = {
4489                0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4490            };
4491            int vex3, vex2 = cpu_ldub_code(env, s->pc);
4492
4493            if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4494                /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4495                   otherwise the instruction is LES or LDS.  */
4496                break;
4497            }
4498            s->pc++;
4499
4500            /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4501            if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4502                            | PREFIX_LOCK | PREFIX_DATA)) {
4503                goto illegal_op;
4504            }
4505#ifdef TARGET_X86_64
4506            if (x86_64_hregs) {
4507                goto illegal_op;
4508            }
4509#endif
4510            rex_r = (~vex2 >> 4) & 8;
4511            if (b == 0xc5) {
4512                vex3 = vex2;
4513                b = cpu_ldub_code(env, s->pc++);
4514            } else {
4515#ifdef TARGET_X86_64
4516                s->rex_x = (~vex2 >> 3) & 8;
4517                s->rex_b = (~vex2 >> 2) & 8;
4518#endif
4519                vex3 = cpu_ldub_code(env, s->pc++);
4520                rex_w = (vex3 >> 7) & 1;
4521                switch (vex2 & 0x1f) {
4522                case 0x01: /* Implied 0f leading opcode bytes.  */
4523                    b = cpu_ldub_code(env, s->pc++) | 0x100;
4524                    break;
4525                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4526                    b = 0x138;
4527                    break;
4528                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4529                    b = 0x13a;
4530                    break;
4531                default:   /* Reserved for future use.  */
4532                    goto illegal_op;
4533                }
4534            }
4535            s->vex_v = (~vex3 >> 3) & 0xf;
4536            s->vex_l = (vex3 >> 2) & 1;
4537            prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4538        }
4539        break;
4540    }
4541
4542    /* Post-process prefixes.  */
4543    if (CODE64(s)) {
4544        /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4545           data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4546           over 0x66 if both are present.  */
4547        dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4548        /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4549        aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4550    } else {
4551        /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4552        if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4553            dflag = MO_32;
4554        } else {
4555            dflag = MO_16;
4556        }
4557        /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4558        if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4559            aflag = MO_32;
4560        }  else {
4561            aflag = MO_16;
4562        }
4563    }
4564
4565    s->prefix = prefixes;
4566    s->aflag = aflag;
4567    s->dflag = dflag;
4568
4569    /* lock generation */
4570    if (prefixes & PREFIX_LOCK)
4571        gen_helper_lock();
4572
4573    /* now check op code */
4574 reswitch:
4575    switch(b) {
4576    case 0x0f:
4577        /**************************/
4578        /* extended op code */
4579        b = cpu_ldub_code(env, s->pc++) | 0x100;
4580        goto reswitch;
4581
4582        /**************************/
4583        /* arith & logic */
4584    case 0x00 ... 0x05:
4585    case 0x08 ... 0x0d:
4586    case 0x10 ... 0x15:
4587    case 0x18 ... 0x1d:
4588    case 0x20 ... 0x25:
4589    case 0x28 ... 0x2d:
4590    case 0x30 ... 0x35:
4591    case 0x38 ... 0x3d:
4592        {
4593            int op, f, val;
4594            op = (b >> 3) & 7;
4595            f = (b >> 1) & 3;
4596
4597            ot = mo_b_d(b, dflag);
4598
4599            switch(f) {
4600            case 0: /* OP Ev, Gv */
4601                modrm = cpu_ldub_code(env, s->pc++);
4602                reg = ((modrm >> 3) & 7) | rex_r;
4603                mod = (modrm >> 6) & 3;
4604                rm = (modrm & 7) | REX_B(s);
4605                if (mod != 3) {
4606                    gen_lea_modrm(env, s, modrm);
4607                    opreg = OR_TMP0;
4608                } else if (op == OP_XORL && rm == reg) {
4609                xor_zero:
4610                    /* xor reg, reg optimisation */
4611                    set_cc_op(s, CC_OP_CLR);
4612                    tcg_gen_movi_tl(cpu_T[0], 0);
4613                    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
4614                    break;
4615                } else {
4616                    opreg = rm;
4617                }
4618                gen_op_mov_v_reg(ot, cpu_T[1], reg);
4619                gen_op(s, op, ot, opreg);
4620                break;
4621            case 1: /* OP Gv, Ev */
4622                modrm = cpu_ldub_code(env, s->pc++);
4623                mod = (modrm >> 6) & 3;
4624                reg = ((modrm >> 3) & 7) | rex_r;
4625                rm = (modrm & 7) | REX_B(s);
4626                if (mod != 3) {
4627                    gen_lea_modrm(env, s, modrm);
4628                    gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
4629                } else if (op == OP_XORL && rm == reg) {
4630                    goto xor_zero;
4631                } else {
4632                    gen_op_mov_v_reg(ot, cpu_T[1], rm);
4633                }
4634                gen_op(s, op, ot, reg);
4635                break;
4636            case 2: /* OP A, Iv */
4637                val = insn_get(env, s, ot);
4638                tcg_gen_movi_tl(cpu_T[1], val);
4639                gen_op(s, op, ot, OR_EAX);
4640                break;
4641            }
4642        }
4643        break;
4644
4645    case 0x82:
4646        if (CODE64(s))
4647            goto illegal_op;
4648    case 0x80: /* GRP1 */
4649    case 0x81:
4650    case 0x83:
4651        {
4652            int val;
4653
4654            ot = mo_b_d(b, dflag);
4655
4656            modrm = cpu_ldub_code(env, s->pc++);
4657            mod = (modrm >> 6) & 3;
4658            rm = (modrm & 7) | REX_B(s);
4659            op = (modrm >> 3) & 7;
4660
4661            if (mod != 3) {
4662                if (b == 0x83)
4663                    s->rip_offset = 1;
4664                else
4665                    s->rip_offset = insn_const_size(ot);
4666                gen_lea_modrm(env, s, modrm);
4667                opreg = OR_TMP0;
4668            } else {
4669                opreg = rm;
4670            }
4671
4672            switch(b) {
4673            default:
4674            case 0x80:
4675            case 0x81:
4676            case 0x82:
4677                val = insn_get(env, s, ot);
4678                break;
4679            case 0x83:
4680                val = (int8_t)insn_get(env, s, MO_8);
4681                break;
4682            }
4683            tcg_gen_movi_tl(cpu_T[1], val);
4684            gen_op(s, op, ot, opreg);
4685        }
4686        break;
4687
4688        /**************************/
4689        /* inc, dec, and other misc arith */
4690    case 0x40 ... 0x47: /* inc Gv */
4691        ot = dflag;
4692        gen_inc(s, ot, OR_EAX + (b & 7), 1);
4693        break;
4694    case 0x48 ... 0x4f: /* dec Gv */
4695        ot = dflag;
4696        gen_inc(s, ot, OR_EAX + (b & 7), -1);
4697        break;
4698    case 0xf6: /* GRP3 */
4699    case 0xf7:
4700        ot = mo_b_d(b, dflag);
4701
4702        modrm = cpu_ldub_code(env, s->pc++);
4703        mod = (modrm >> 6) & 3;
4704        rm = (modrm & 7) | REX_B(s);
4705        op = (modrm >> 3) & 7;
4706        if (mod != 3) {
4707            if (op == 0)
4708                s->rip_offset = insn_const_size(ot);
4709            gen_lea_modrm(env, s, modrm);
4710            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
4711        } else {
4712            gen_op_mov_v_reg(ot, cpu_T[0], rm);
4713        }
4714
4715        switch(op) {
4716        case 0: /* test */
4717            val = insn_get(env, s, ot);
4718            tcg_gen_movi_tl(cpu_T[1], val);
4719            gen_op_testl_T0_T1_cc();
4720            set_cc_op(s, CC_OP_LOGICB + ot);
4721            break;
4722        case 2: /* not */
4723            tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
4724            if (mod != 3) {
4725                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
4726            } else {
4727                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4728            }
4729            break;
4730        case 3: /* neg */
4731            tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
4732            if (mod != 3) {
4733                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
4734            } else {
4735                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
4736            }
4737            gen_op_update_neg_cc();
4738            set_cc_op(s, CC_OP_SUBB + ot);
4739            break;
4740        case 4: /* mul */
4741            switch(ot) {
4742            case MO_8:
4743                gen_op_mov_v_reg(MO_8, cpu_T[1], R_EAX);
4744                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
4745                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
4746                /* XXX: use 32 bit mul which could be faster */
4747                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4748                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
4749                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4750                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
4751                set_cc_op(s, CC_OP_MULB);
4752                break;
4753            case MO_16:
4754                gen_op_mov_v_reg(MO_16, cpu_T[1], R_EAX);
4755                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4756                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
4757                /* XXX: use 32 bit mul which could be faster */
4758                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4759                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
4760                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4761                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4762                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
4763                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
4764                set_cc_op(s, CC_OP_MULW);
4765                break;
4766            default:
4767            case MO_32:
4768                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4769                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4770                tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4771                                  cpu_tmp2_i32, cpu_tmp3_i32);
4772                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4773                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4774                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4775                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4776                set_cc_op(s, CC_OP_MULL);
4777                break;
4778#ifdef TARGET_X86_64
4779            case MO_64:
4780                tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4781                                  cpu_T[0], cpu_regs[R_EAX]);
4782                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4783                tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4784                set_cc_op(s, CC_OP_MULQ);
4785                break;
4786#endif
4787            }
4788            break;
4789        case 5: /* imul */
4790            switch(ot) {
4791            case MO_8:
4792                gen_op_mov_v_reg(MO_8, cpu_T[1], R_EAX);
4793                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
4794                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
4795                /* XXX: use 32 bit mul which could be faster */
4796                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4797                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
4798                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4799                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
4800                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4801                set_cc_op(s, CC_OP_MULB);
4802                break;
4803            case MO_16:
4804                gen_op_mov_v_reg(MO_16, cpu_T[1], R_EAX);
4805                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
4806                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
4807                /* XXX: use 32 bit mul which could be faster */
4808                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
4809                gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
4810                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
4811                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
4812                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
4813                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
4814                gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
4815                set_cc_op(s, CC_OP_MULW);
4816                break;
4817            default:
4818            case MO_32:
4819                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4820                tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
4821                tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
4822                                  cpu_tmp2_i32, cpu_tmp3_i32);
4823                tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
4824                tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
4825                tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
4826                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4827                tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
4828                tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
4829                set_cc_op(s, CC_OP_MULL);
4830                break;
4831#ifdef TARGET_X86_64
4832            case MO_64:
4833                tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4834                                  cpu_T[0], cpu_regs[R_EAX]);
4835                tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4836                tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4837                tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4838                set_cc_op(s, CC_OP_MULQ);
4839                break;
4840#endif
4841            }
4842            break;
4843        case 6: /* div */
4844            switch(ot) {
4845            case MO_8:
4846                gen_jmp_im(pc_start - s->cs_base);
4847                gen_helper_divb_AL(cpu_env, cpu_T[0]);
4848                break;
4849            case MO_16:
4850                gen_jmp_im(pc_start - s->cs_base);
4851                gen_helper_divw_AX(cpu_env, cpu_T[0]);
4852                break;
4853            default:
4854            case MO_32:
4855                gen_jmp_im(pc_start - s->cs_base);
4856                gen_helper_divl_EAX(cpu_env, cpu_T[0]);
4857                break;
4858#ifdef TARGET_X86_64
4859            case MO_64:
4860                gen_jmp_im(pc_start - s->cs_base);
4861                gen_helper_divq_EAX(cpu_env, cpu_T[0]);
4862                break;
4863#endif
4864            }
4865            break;
4866        case 7: /* idiv */
4867            switch(ot) {
4868            case MO_8:
4869                gen_jmp_im(pc_start - s->cs_base);
4870                gen_helper_idivb_AL(cpu_env, cpu_T[0]);
4871                break;
4872            case MO_16:
4873                gen_jmp_im(pc_start - s->cs_base);
4874                gen_helper_idivw_AX(cpu_env, cpu_T[0]);
4875                break;
4876            default:
4877            case MO_32:
4878                gen_jmp_im(pc_start - s->cs_base);
4879                gen_helper_idivl_EAX(cpu_env, cpu_T[0]);
4880                break;
4881#ifdef TARGET_X86_64
4882            case MO_64:
4883                gen_jmp_im(pc_start - s->cs_base);
4884                gen_helper_idivq_EAX(cpu_env, cpu_T[0]);
4885                break;
4886#endif
4887            }
4888            break;
4889        default:
4890            goto illegal_op;
4891        }
4892        break;
4893
4894    case 0xfe: /* GRP4 */
4895    case 0xff: /* GRP5 */
4896        ot = mo_b_d(b, dflag);
4897
4898        modrm = cpu_ldub_code(env, s->pc++);
4899        mod = (modrm >> 6) & 3;
4900        rm = (modrm & 7) | REX_B(s);
4901        op = (modrm >> 3) & 7;
4902        if (op >= 2 && b == 0xfe) {
4903            goto illegal_op;
4904        }
4905        if (CODE64(s)) {
4906            if (op == 2 || op == 4) {
4907                /* operand size for jumps is 64 bit */
4908                ot = MO_64;
4909            } else if (op == 3 || op == 5) {
4910                ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
4911            } else if (op == 6) {
4912                /* default push size is 64 bit */
4913                ot = mo_pushpop(s, dflag);
4914            }
4915        }
4916        if (mod != 3) {
4917            gen_lea_modrm(env, s, modrm);
4918            if (op >= 2 && op != 3 && op != 5)
4919                gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
4920        } else {
4921            gen_op_mov_v_reg(ot, cpu_T[0], rm);
4922        }
4923
4924        switch(op) {
4925        case 0: /* inc Ev */
4926            if (mod != 3)
4927                opreg = OR_TMP0;
4928            else
4929                opreg = rm;
4930            gen_inc(s, ot, opreg, 1);
4931            break;
4932        case 1: /* dec Ev */
4933            if (mod != 3)
4934                opreg = OR_TMP0;
4935            else
4936                opreg = rm;
4937            gen_inc(s, ot, opreg, -1);
4938            break;
4939        case 2: /* call Ev */
4940            /* XXX: optimize if memory (no 'and' is necessary) */
4941            if (dflag == MO_16) {
4942                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4943            }
4944            next_eip = s->pc - s->cs_base;
4945            tcg_gen_movi_tl(cpu_T[1], next_eip);
4946            gen_push_v(s, cpu_T[1]);
4947            gen_op_jmp_v(cpu_T[0]);
4948            gen_eob(s);
4949            break;
4950        case 3: /* lcall Ev */
4951            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
4952            gen_add_A0_im(s, 1 << ot);
4953            gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
4954        do_lcall:
4955            if (s->pe && !s->vm86) {
4956                gen_update_cc_op(s);
4957                gen_jmp_im(pc_start - s->cs_base);
4958                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4959                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
4960                                           tcg_const_i32(dflag - 1),
4961                                           tcg_const_i32(s->pc - pc_start));
4962            } else {
4963                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4964                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T[1],
4965                                      tcg_const_i32(dflag - 1),
4966                                      tcg_const_i32(s->pc - s->cs_base));
4967            }
4968            gen_eob(s);
4969            break;
4970        case 4: /* jmp Ev */
4971            if (dflag == MO_16) {
4972                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
4973            }
4974            gen_op_jmp_v(cpu_T[0]);
4975            gen_eob(s);
4976            break;
4977        case 5: /* ljmp Ev */
4978            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
4979            gen_add_A0_im(s, 1 << ot);
4980            gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
4981        do_ljmp:
4982            if (s->pe && !s->vm86) {
4983                gen_update_cc_op(s);
4984                gen_jmp_im(pc_start - s->cs_base);
4985                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
4986                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
4987                                          tcg_const_i32(s->pc - pc_start));
4988            } else {
4989                gen_op_movl_seg_T0_vm(R_CS);
4990                gen_op_jmp_v(cpu_T[1]);
4991            }
4992            gen_eob(s);
4993            break;
4994        case 6: /* push Ev */
4995            gen_push_v(s, cpu_T[0]);
4996            break;
4997        default:
4998            goto illegal_op;
4999        }
5000        break;
5001
5002    case 0x84: /* test Ev, Gv */
5003    case 0x85:
5004        ot = mo_b_d(b, dflag);
5005
5006        modrm = cpu_ldub_code(env, s->pc++);
5007        reg = ((modrm >> 3) & 7) | rex_r;
5008
5009        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5010        gen_op_mov_v_reg(ot, cpu_T[1], reg);
5011        gen_op_testl_T0_T1_cc();
5012        set_cc_op(s, CC_OP_LOGICB + ot);
5013        break;
5014
5015    case 0xa8: /* test eAX, Iv */
5016    case 0xa9:
5017        ot = mo_b_d(b, dflag);
5018        val = insn_get(env, s, ot);
5019
5020        gen_op_mov_v_reg(ot, cpu_T[0], OR_EAX);
5021        tcg_gen_movi_tl(cpu_T[1], val);
5022        gen_op_testl_T0_T1_cc();
5023        set_cc_op(s, CC_OP_LOGICB + ot);
5024        break;
5025
5026    case 0x98: /* CWDE/CBW */
5027        switch (dflag) {
5028#ifdef TARGET_X86_64
5029        case MO_64:
5030            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
5031            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
5032            gen_op_mov_reg_v(MO_64, R_EAX, cpu_T[0]);
5033            break;
5034#endif
5035        case MO_32:
5036            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EAX);
5037            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
5038            gen_op_mov_reg_v(MO_32, R_EAX, cpu_T[0]);
5039            break;
5040        case MO_16:
5041            gen_op_mov_v_reg(MO_8, cpu_T[0], R_EAX);
5042            tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
5043            gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
5044            break;
5045        default:
5046            tcg_abort();
5047        }
5048        break;
5049    case 0x99: /* CDQ/CWD */
5050        switch (dflag) {
5051#ifdef TARGET_X86_64
5052        case MO_64:
5053            gen_op_mov_v_reg(MO_64, cpu_T[0], R_EAX);
5054            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 63);
5055            gen_op_mov_reg_v(MO_64, R_EDX, cpu_T[0]);
5056            break;
5057#endif
5058        case MO_32:
5059            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EAX);
5060            tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
5061            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 31);
5062            gen_op_mov_reg_v(MO_32, R_EDX, cpu_T[0]);
5063            break;
5064        case MO_16:
5065            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EAX);
5066            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
5067            tcg_gen_sari_tl(cpu_T[0], cpu_T[0], 15);
5068            gen_op_mov_reg_v(MO_16, R_EDX, cpu_T[0]);
5069            break;
5070        default:
5071            tcg_abort();
5072        }
5073        break;
5074    case 0x1af: /* imul Gv, Ev */
5075    case 0x69: /* imul Gv, Ev, I */
5076    case 0x6b:
5077        ot = dflag;
5078        modrm = cpu_ldub_code(env, s->pc++);
5079        reg = ((modrm >> 3) & 7) | rex_r;
5080        if (b == 0x69)
5081            s->rip_offset = insn_const_size(ot);
5082        else if (b == 0x6b)
5083            s->rip_offset = 1;
5084        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5085        if (b == 0x69) {
5086            val = insn_get(env, s, ot);
5087            tcg_gen_movi_tl(cpu_T[1], val);
5088        } else if (b == 0x6b) {
5089            val = (int8_t)insn_get(env, s, MO_8);
5090            tcg_gen_movi_tl(cpu_T[1], val);
5091        } else {
5092            gen_op_mov_v_reg(ot, cpu_T[1], reg);
5093        }
5094        switch (ot) {
5095#ifdef TARGET_X86_64
5096        case MO_64:
5097            tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]);
5098            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5099            tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5100            tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]);
5101            break;
5102#endif
5103        case MO_32:
5104            tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
5105            tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
5106            tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
5107                              cpu_tmp2_i32, cpu_tmp3_i32);
5108            tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
5109            tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
5110            tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5111            tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
5112            tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
5113            break;
5114        default:
5115            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
5116            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
5117            /* XXX: use 32 bit mul which could be faster */
5118            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
5119            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
5120            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
5121            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
5122            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
5123            break;
5124        }
5125        set_cc_op(s, CC_OP_MULB + ot);
5126        break;
5127    case 0x1c0:
5128    case 0x1c1: /* xadd Ev, Gv */
5129        ot = mo_b_d(b, dflag);
5130        modrm = cpu_ldub_code(env, s->pc++);
5131        reg = ((modrm >> 3) & 7) | rex_r;
5132        mod = (modrm >> 6) & 3;
5133        if (mod == 3) {
5134            rm = (modrm & 7) | REX_B(s);
5135            gen_op_mov_v_reg(ot, cpu_T[0], reg);
5136            gen_op_mov_v_reg(ot, cpu_T[1], rm);
5137            tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
5138            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5139            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
5140        } else {
5141            gen_lea_modrm(env, s, modrm);
5142            gen_op_mov_v_reg(ot, cpu_T[0], reg);
5143            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
5144            tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
5145            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
5146            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5147        }
5148        gen_op_update2_cc();
5149        set_cc_op(s, CC_OP_ADDB + ot);
5150        break;
5151    case 0x1b0:
5152    case 0x1b1: /* cmpxchg Ev, Gv */
5153        {
5154            int label1, label2;
5155            TCGv t0, t1, t2, a0;
5156
5157            ot = mo_b_d(b, dflag);
5158            modrm = cpu_ldub_code(env, s->pc++);
5159            reg = ((modrm >> 3) & 7) | rex_r;
5160            mod = (modrm >> 6) & 3;
5161            t0 = tcg_temp_local_new();
5162            t1 = tcg_temp_local_new();
5163            t2 = tcg_temp_local_new();
5164            a0 = tcg_temp_local_new();
5165            gen_op_mov_v_reg(ot, t1, reg);
5166            if (mod == 3) {
5167                rm = (modrm & 7) | REX_B(s);
5168                gen_op_mov_v_reg(ot, t0, rm);
5169            } else {
5170                gen_lea_modrm(env, s, modrm);
5171                tcg_gen_mov_tl(a0, cpu_A0);
5172                gen_op_ld_v(s, ot, t0, a0);
5173                rm = 0; /* avoid warning */
5174            }
5175            label1 = gen_new_label();
5176            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
5177            gen_extu(ot, t0);
5178            gen_extu(ot, t2);
5179            tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
5180            label2 = gen_new_label();
5181            if (mod == 3) {
5182                gen_op_mov_reg_v(ot, R_EAX, t0);
5183                tcg_gen_br(label2);
5184                gen_set_label(label1);
5185                gen_op_mov_reg_v(ot, rm, t1);
5186            } else {
5187                /* perform no-op store cycle like physical cpu; must be
5188                   before changing accumulator to ensure idempotency if
5189                   the store faults and the instruction is restarted */
5190                gen_op_st_v(s, ot, t0, a0);
5191                gen_op_mov_reg_v(ot, R_EAX, t0);
5192                tcg_gen_br(label2);
5193                gen_set_label(label1);
5194                gen_op_st_v(s, ot, t1, a0);
5195            }
5196            gen_set_label(label2);
5197            tcg_gen_mov_tl(cpu_cc_src, t0);
5198            tcg_gen_mov_tl(cpu_cc_srcT, t2);
5199            tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
5200            set_cc_op(s, CC_OP_SUBB + ot);
5201            tcg_temp_free(t0);
5202            tcg_temp_free(t1);
5203            tcg_temp_free(t2);
5204            tcg_temp_free(a0);
5205        }
5206        break;
5207    case 0x1c7: /* cmpxchg8b */
5208        modrm = cpu_ldub_code(env, s->pc++);
5209        mod = (modrm >> 6) & 3;
5210        if ((mod == 3) || ((modrm & 0x38) != 0x8))
5211            goto illegal_op;
5212#ifdef TARGET_X86_64
5213        if (dflag == MO_64) {
5214            if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
5215                goto illegal_op;
5216            gen_jmp_im(pc_start - s->cs_base);
5217            gen_update_cc_op(s);
5218            gen_lea_modrm(env, s, modrm);
5219            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
5220        } else
5221#endif        
5222        {
5223            if (!(s->cpuid_features & CPUID_CX8))
5224                goto illegal_op;
5225            gen_jmp_im(pc_start - s->cs_base);
5226            gen_update_cc_op(s);
5227            gen_lea_modrm(env, s, modrm);
5228            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
5229        }
5230        set_cc_op(s, CC_OP_EFLAGS);
5231        break;
5232
5233        /**************************/
5234        /* push/pop */
5235    case 0x50 ... 0x57: /* push */
5236        gen_op_mov_v_reg(MO_32, cpu_T[0], (b & 7) | REX_B(s));
5237        gen_push_v(s, cpu_T[0]);
5238        break;
5239    case 0x58 ... 0x5f: /* pop */
5240        ot = gen_pop_T0(s);
5241        /* NOTE: order is important for pop %sp */
5242        gen_pop_update(s, ot);
5243        gen_op_mov_reg_v(ot, (b & 7) | REX_B(s), cpu_T[0]);
5244        break;
5245    case 0x60: /* pusha */
5246        if (CODE64(s))
5247            goto illegal_op;
5248        gen_pusha(s);
5249        break;
5250    case 0x61: /* popa */
5251        if (CODE64(s))
5252            goto illegal_op;
5253        gen_popa(s);
5254        break;
5255    case 0x68: /* push Iv */
5256    case 0x6a:
5257        ot = mo_pushpop(s, dflag);
5258        if (b == 0x68)
5259            val = insn_get(env, s, ot);
5260        else
5261            val = (int8_t)insn_get(env, s, MO_8);
5262        tcg_gen_movi_tl(cpu_T[0], val);
5263        gen_push_v(s, cpu_T[0]);
5264        break;
5265    case 0x8f: /* pop Ev */
5266        modrm = cpu_ldub_code(env, s->pc++);
5267        mod = (modrm >> 6) & 3;
5268        ot = gen_pop_T0(s);
5269        if (mod == 3) {
5270            /* NOTE: order is important for pop %sp */
5271            gen_pop_update(s, ot);
5272            rm = (modrm & 7) | REX_B(s);
5273            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
5274        } else {
5275            /* NOTE: order is important too for MMU exceptions */
5276            s->popl_esp_hack = 1 << ot;
5277            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5278            s->popl_esp_hack = 0;
5279            gen_pop_update(s, ot);
5280        }
5281        break;
5282    case 0xc8: /* enter */
5283        {
5284            int level;
5285            val = cpu_lduw_code(env, s->pc);
5286            s->pc += 2;
5287            level = cpu_ldub_code(env, s->pc++);
5288            gen_enter(s, val, level);
5289        }
5290        break;
5291    case 0xc9: /* leave */
5292        /* XXX: exception not precise (ESP is updated before potential exception) */
5293        if (CODE64(s)) {
5294            gen_op_mov_v_reg(MO_64, cpu_T[0], R_EBP);
5295            gen_op_mov_reg_v(MO_64, R_ESP, cpu_T[0]);
5296        } else if (s->ss32) {
5297            gen_op_mov_v_reg(MO_32, cpu_T[0], R_EBP);
5298            gen_op_mov_reg_v(MO_32, R_ESP, cpu_T[0]);
5299        } else {
5300            gen_op_mov_v_reg(MO_16, cpu_T[0], R_EBP);
5301            gen_op_mov_reg_v(MO_16, R_ESP, cpu_T[0]);
5302        }
5303        ot = gen_pop_T0(s);
5304        gen_op_mov_reg_v(ot, R_EBP, cpu_T[0]);
5305        gen_pop_update(s, ot);
5306        break;
5307    case 0x06: /* push es */
5308    case 0x0e: /* push cs */
5309    case 0x16: /* push ss */
5310    case 0x1e: /* push ds */
5311        if (CODE64(s))
5312            goto illegal_op;
5313        gen_op_movl_T0_seg(b >> 3);
5314        gen_push_v(s, cpu_T[0]);
5315        break;
5316    case 0x1a0: /* push fs */
5317    case 0x1a8: /* push gs */
5318        gen_op_movl_T0_seg((b >> 3) & 7);
5319        gen_push_v(s, cpu_T[0]);
5320        break;
5321    case 0x07: /* pop es */
5322    case 0x17: /* pop ss */
5323    case 0x1f: /* pop ds */
5324        if (CODE64(s))
5325            goto illegal_op;
5326        reg = b >> 3;
5327        ot = gen_pop_T0(s);
5328        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
5329        gen_pop_update(s, ot);
5330        if (reg == R_SS) {
5331            /* if reg == SS, inhibit interrupts/trace. */
5332            /* If several instructions disable interrupts, only the
5333               _first_ does it */
5334            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
5335                gen_helper_set_inhibit_irq(cpu_env);
5336            s->tf = 0;
5337        }
5338        if (s->is_jmp) {
5339            gen_jmp_im(s->pc - s->cs_base);
5340            gen_eob(s);
5341        }
5342        break;
5343    case 0x1a1: /* pop fs */
5344    case 0x1a9: /* pop gs */
5345        ot = gen_pop_T0(s);
5346        gen_movl_seg_T0(s, (b >> 3) & 7, pc_start - s->cs_base);
5347        gen_pop_update(s, ot);
5348        if (s->is_jmp) {
5349            gen_jmp_im(s->pc - s->cs_base);
5350            gen_eob(s);
5351        }
5352        break;
5353
5354        /**************************/
5355        /* mov */
5356    case 0x88:
5357    case 0x89: /* mov Gv, Ev */
5358        ot = mo_b_d(b, dflag);
5359        modrm = cpu_ldub_code(env, s->pc++);
5360        reg = ((modrm >> 3) & 7) | rex_r;
5361
5362        /* generate a generic store */
5363        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5364        break;
5365    case 0xc6:
5366    case 0xc7: /* mov Ev, Iv */
5367        ot = mo_b_d(b, dflag);
5368        modrm = cpu_ldub_code(env, s->pc++);
5369        mod = (modrm >> 6) & 3;
5370        if (mod != 3) {
5371            s->rip_offset = insn_const_size(ot);
5372            gen_lea_modrm(env, s, modrm);
5373        }
5374        val = insn_get(env, s, ot);
5375        tcg_gen_movi_tl(cpu_T[0], val);
5376        if (mod != 3) {
5377            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
5378        } else {
5379            gen_op_mov_reg_v(ot, (modrm & 7) | REX_B(s), cpu_T[0]);
5380        }
5381        break;
5382    case 0x8a:
5383    case 0x8b: /* mov Ev, Gv */
5384        ot = mo_b_d(b, dflag);
5385        modrm = cpu_ldub_code(env, s->pc++);
5386        reg = ((modrm >> 3) & 7) | rex_r;
5387
5388        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5389        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
5390        break;
5391    case 0x8e: /* mov seg, Gv */
5392        modrm = cpu_ldub_code(env, s->pc++);
5393        reg = (modrm >> 3) & 7;
5394        if (reg >= 6 || reg == R_CS)
5395            goto illegal_op;
5396        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5397        gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
5398        if (reg == R_SS) {
5399            /* if reg == SS, inhibit interrupts/trace */
5400            /* If several instructions disable interrupts, only the
5401               _first_ does it */
5402            if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
5403                gen_helper_set_inhibit_irq(cpu_env);
5404            s->tf = 0;
5405        }
5406        if (s->is_jmp) {
5407            gen_jmp_im(s->pc - s->cs_base);
5408            gen_eob(s);
5409        }
5410        break;
5411    case 0x8c: /* mov Gv, seg */
5412        modrm = cpu_ldub_code(env, s->pc++);
5413        reg = (modrm >> 3) & 7;
5414        mod = (modrm >> 6) & 3;
5415        if (reg >= 6)
5416            goto illegal_op;
5417        gen_op_movl_T0_seg(reg);
5418        ot = mod == 3 ? dflag : MO_16;
5419        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5420        break;
5421
5422    case 0x1b6: /* movzbS Gv, Eb */
5423    case 0x1b7: /* movzwS Gv, Eb */
5424    case 0x1be: /* movsbS Gv, Eb */
5425    case 0x1bf: /* movswS Gv, Eb */
5426        {
5427            TCGMemOp d_ot;
5428            TCGMemOp s_ot;
5429
5430            /* d_ot is the size of destination */
5431            d_ot = dflag;
5432            /* ot is the size of source */
5433            ot = (b & 1) + MO_8;
5434            /* s_ot is the sign+size of source */
5435            s_ot = b & 8 ? MO_SIGN | ot : ot;
5436
5437            modrm = cpu_ldub_code(env, s->pc++);
5438            reg = ((modrm >> 3) & 7) | rex_r;
5439            mod = (modrm >> 6) & 3;
5440            rm = (modrm & 7) | REX_B(s);
5441
5442            if (mod == 3) {
5443                gen_op_mov_v_reg(ot, cpu_T[0], rm);
5444                switch (s_ot) {
5445                case MO_UB:
5446                    tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
5447                    break;
5448                case MO_SB:
5449                    tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
5450                    break;
5451                case MO_UW:
5452                    tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
5453                    break;
5454                default:
5455                case MO_SW:
5456                    tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
5457                    break;
5458                }
5459                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
5460            } else {
5461                gen_lea_modrm(env, s, modrm);
5462                gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
5463                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
5464            }
5465        }
5466        break;
5467
5468    case 0x8d: /* lea */
5469        ot = dflag;
5470        modrm = cpu_ldub_code(env, s->pc++);
5471        mod = (modrm >> 6) & 3;
5472        if (mod == 3)
5473            goto illegal_op;
5474        reg = ((modrm >> 3) & 7) | rex_r;
5475        /* we must ensure that no segment is added */
5476        s->override = -1;
5477        val = s->addseg;
5478        s->addseg = 0;
5479        gen_lea_modrm(env, s, modrm);
5480        s->addseg = val;
5481        gen_op_mov_reg_v(ot, reg, cpu_A0);
5482        break;
5483
5484    case 0xa0: /* mov EAX, Ov */
5485    case 0xa1:
5486    case 0xa2: /* mov Ov, EAX */
5487    case 0xa3:
5488        {
5489            target_ulong offset_addr;
5490
5491            ot = mo_b_d(b, dflag);
5492            switch (s->aflag) {
5493#ifdef TARGET_X86_64
5494            case MO_64:
5495                offset_addr = cpu_ldq_code(env, s->pc);
5496                s->pc += 8;
5497                break;
5498#endif
5499            default:
5500                offset_addr = insn_get(env, s, s->aflag);
5501                break;
5502            }
5503            tcg_gen_movi_tl(cpu_A0, offset_addr);
5504            gen_add_A0_ds_seg(s);
5505            if ((b & 2) == 0) {
5506                gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
5507                gen_op_mov_reg_v(ot, R_EAX, cpu_T[0]);
5508            } else {
5509                gen_op_mov_v_reg(ot, cpu_T[0], R_EAX);
5510                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
5511            }
5512        }
5513        break;
5514    case 0xd7: /* xlat */
5515        tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EBX]);
5516        tcg_gen_ext8u_tl(cpu_T[0], cpu_regs[R_EAX]);
5517        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_T[0]);
5518        gen_extu(s->aflag, cpu_A0);
5519        gen_add_A0_ds_seg(s);
5520        gen_op_ld_v(s, MO_8, cpu_T[0], cpu_A0);
5521        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T[0]);
5522        break;
5523    case 0xb0 ... 0xb7: /* mov R, Ib */
5524        val = insn_get(env, s, MO_8);
5525        tcg_gen_movi_tl(cpu_T[0], val);
5526        gen_op_mov_reg_v(MO_8, (b & 7) | REX_B(s), cpu_T[0]);
5527        break;
5528    case 0xb8 ... 0xbf: /* mov R, Iv */
5529#ifdef TARGET_X86_64
5530        if (dflag == MO_64) {
5531            uint64_t tmp;
5532            /* 64 bit case */
5533            tmp = cpu_ldq_code(env, s->pc);
5534            s->pc += 8;
5535            reg = (b & 7) | REX_B(s);
5536            tcg_gen_movi_tl(cpu_T[0], tmp);
5537            gen_op_mov_reg_v(MO_64, reg, cpu_T[0]);
5538        } else
5539#endif
5540        {
5541            ot = dflag;
5542            val = insn_get(env, s, ot);
5543            reg = (b & 7) | REX_B(s);
5544            tcg_gen_movi_tl(cpu_T[0], val);
5545            gen_op_mov_reg_v(ot, reg, cpu_T[0]);
5546        }
5547        break;
5548
5549    case 0x91 ... 0x97: /* xchg R, EAX */
5550    do_xchg_reg_eax:
5551        ot = dflag;
5552        reg = (b & 7) | REX_B(s);
5553        rm = R_EAX;
5554        goto do_xchg_reg;
5555    case 0x86:
5556    case 0x87: /* xchg Ev, Gv */
5557        ot = mo_b_d(b, dflag);
5558        modrm = cpu_ldub_code(env, s->pc++);
5559        reg = ((modrm >> 3) & 7) | rex_r;
5560        mod = (modrm >> 6) & 3;
5561        if (mod == 3) {
5562            rm = (modrm & 7) | REX_B(s);
5563        do_xchg_reg:
5564            gen_op_mov_v_reg(ot, cpu_T[0], reg);
5565            gen_op_mov_v_reg(ot, cpu_T[1], rm);
5566            gen_op_mov_reg_v(ot, rm, cpu_T[0]);
5567            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5568        } else {
5569            gen_lea_modrm(env, s, modrm);
5570            gen_op_mov_v_reg(ot, cpu_T[0], reg);
5571            /* for xchg, lock is implicit */
5572            if (!(prefixes & PREFIX_LOCK))
5573                gen_helper_lock();
5574            gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
5575            gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
5576            if (!(prefixes & PREFIX_LOCK))
5577                gen_helper_unlock();
5578            gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5579        }
5580        break;
5581    case 0xc4: /* les Gv */
5582        /* In CODE64 this is VEX3; see above.  */
5583        op = R_ES;
5584        goto do_lxx;
5585    case 0xc5: /* lds Gv */
5586        /* In CODE64 this is VEX2; see above.  */
5587        op = R_DS;
5588        goto do_lxx;
5589    case 0x1b2: /* lss Gv */
5590        op = R_SS;
5591        goto do_lxx;
5592    case 0x1b4: /* lfs Gv */
5593        op = R_FS;
5594        goto do_lxx;
5595    case 0x1b5: /* lgs Gv */
5596        op = R_GS;
5597    do_lxx:
5598        ot = dflag != MO_16 ? MO_32 : MO_16;
5599        modrm = cpu_ldub_code(env, s->pc++);
5600        reg = ((modrm >> 3) & 7) | rex_r;
5601        mod = (modrm >> 6) & 3;
5602        if (mod == 3)
5603            goto illegal_op;
5604        gen_lea_modrm(env, s, modrm);
5605        gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
5606        gen_add_A0_im(s, 1 << ot);
5607        /* load the segment first to handle exceptions properly */
5608        gen_op_ld_v(s, MO_16, cpu_T[0], cpu_A0);
5609        gen_movl_seg_T0(s, op, pc_start - s->cs_base);
5610        /* then put the data */
5611        gen_op_mov_reg_v(ot, reg, cpu_T[1]);
5612        if (s->is_jmp) {
5613            gen_jmp_im(s->pc - s->cs_base);
5614            gen_eob(s);
5615        }
5616        break;
5617
5618        /************************/
5619        /* shifts */
5620    case 0xc0:
5621    case 0xc1:
5622        /* shift Ev,Ib */
5623        shift = 2;
5624    grp2:
5625        {
5626            ot = mo_b_d(b, dflag);
5627            modrm = cpu_ldub_code(env, s->pc++);
5628            mod = (modrm >> 6) & 3;
5629            op = (modrm >> 3) & 7;
5630
5631            if (mod != 3) {
5632                if (shift == 2) {
5633                    s->rip_offset = 1;
5634                }
5635                gen_lea_modrm(env, s, modrm);
5636                opreg = OR_TMP0;
5637            } else {
5638                opreg = (modrm & 7) | REX_B(s);
5639            }
5640
5641            /* simpler op */
5642            if (shift == 0) {
5643                gen_shift(s, op, ot, opreg, OR_ECX);
5644            } else {
5645                if (shift == 2) {
5646                    shift = cpu_ldub_code(env, s->pc++);
5647                }
5648                gen_shifti(s, op, ot, opreg, shift);
5649            }
5650        }
5651        break;
5652    case 0xd0:
5653    case 0xd1:
5654        /* shift Ev,1 */
5655        shift = 1;
5656        goto grp2;
5657    case 0xd2:
5658    case 0xd3:
5659        /* shift Ev,cl */
5660        shift = 0;
5661        goto grp2;
5662
5663    case 0x1a4: /* shld imm */
5664        op = 0;
5665        shift = 1;
5666        goto do_shiftd;
5667    case 0x1a5: /* shld cl */
5668        op = 0;
5669        shift = 0;
5670        goto do_shiftd;
5671    case 0x1ac: /* shrd imm */
5672        op = 1;
5673        shift = 1;
5674        goto do_shiftd;
5675    case 0x1ad: /* shrd cl */
5676        op = 1;
5677        shift = 0;
5678    do_shiftd:
5679        ot = dflag;
5680        modrm = cpu_ldub_code(env, s->pc++);
5681        mod = (modrm >> 6) & 3;
5682        rm = (modrm & 7) | REX_B(s);
5683        reg = ((modrm >> 3) & 7) | rex_r;
5684        if (mod != 3) {
5685            gen_lea_modrm(env, s, modrm);
5686            opreg = OR_TMP0;
5687        } else {
5688            opreg = rm;
5689        }
5690        gen_op_mov_v_reg(ot, cpu_T[1], reg);
5691
5692        if (shift) {
5693            TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++));
5694            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5695            tcg_temp_free(imm);
5696        } else {
5697            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5698        }
5699        break;
5700
5701        /************************/
5702        /* floats */
5703    case 0xd8 ... 0xdf:
5704        if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5705            /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5706            /* XXX: what to do if illegal op ? */
5707            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5708            break;
5709        }
5710        modrm = cpu_ldub_code(env, s->pc++);
5711        mod = (modrm >> 6) & 3;
5712        rm = modrm & 7;
5713        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5714        if (mod != 3) {
5715            /* memory op */
5716            gen_lea_modrm(env, s, modrm);
5717            switch(op) {
5718            case 0x00 ... 0x07: /* fxxxs */
5719            case 0x10 ... 0x17: /* fixxxl */
5720            case 0x20 ... 0x27: /* fxxxl */
5721            case 0x30 ... 0x37: /* fixxx */
5722                {
5723                    int op1;
5724                    op1 = op & 7;
5725
5726                    switch(op >> 4) {
5727                    case 0:
5728                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5729                                            s->mem_index, MO_LEUL);
5730                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
5731                        break;
5732                    case 1:
5733                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5734                                            s->mem_index, MO_LEUL);
5735                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5736                        break;
5737                    case 2:
5738                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5739                                            s->mem_index, MO_LEQ);
5740                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
5741                        break;
5742                    case 3:
5743                    default:
5744                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5745                                            s->mem_index, MO_LESW);
5746                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
5747                        break;
5748                    }
5749
5750                    gen_helper_fp_arith_ST0_FT0(op1);
5751                    if (op1 == 3) {
5752                        /* fcomp needs pop */
5753                        gen_helper_fpop(cpu_env);
5754                    }
5755                }
5756                break;
5757            case 0x08: /* flds */
5758            case 0x0a: /* fsts */
5759            case 0x0b: /* fstps */
5760            case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5761            case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5762            case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5763                switch(op & 7) {
5764                case 0:
5765                    switch(op >> 4) {
5766                    case 0:
5767                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5768                                            s->mem_index, MO_LEUL);
5769                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
5770                        break;
5771                    case 1:
5772                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5773                                            s->mem_index, MO_LEUL);
5774                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5775                        break;
5776                    case 2:
5777                        tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0,
5778                                            s->mem_index, MO_LEQ);
5779                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
5780                        break;
5781                    case 3:
5782                    default:
5783                        tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5784                                            s->mem_index, MO_LESW);
5785                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
5786                        break;
5787                    }
5788                    break;
5789                case 1:
5790                    /* XXX: the corresponding CPUID bit must be tested ! */
5791                    switch(op >> 4) {
5792                    case 1:
5793                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
5794                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5795                                            s->mem_index, MO_LEUL);
5796                        break;
5797                    case 2:
5798                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
5799                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5800                                            s->mem_index, MO_LEQ);
5801                        break;
5802                    case 3:
5803                    default:
5804                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
5805                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5806                                            s->mem_index, MO_LEUW);
5807                        break;
5808                    }
5809                    gen_helper_fpop(cpu_env);
5810                    break;
5811                default:
5812                    switch(op >> 4) {
5813                    case 0:
5814                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
5815                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5816                                            s->mem_index, MO_LEUL);
5817                        break;
5818                    case 1:
5819                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
5820                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5821                                            s->mem_index, MO_LEUL);
5822                        break;
5823                    case 2:
5824                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
5825                        tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0,
5826                                            s->mem_index, MO_LEQ);
5827                        break;
5828                    case 3:
5829                    default:
5830                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
5831                        tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5832                                            s->mem_index, MO_LEUW);
5833                        break;
5834                    }
5835                    if ((op & 7) == 3)
5836                        gen_helper_fpop(cpu_env);
5837                    break;
5838                }
5839                break;
5840            case 0x0c: /* fldenv mem */
5841                gen_update_cc_op(s);
5842                gen_jmp_im(pc_start - s->cs_base);
5843                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5844                break;
5845            case 0x0d: /* fldcw mem */
5846                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
5847                                    s->mem_index, MO_LEUW);
5848                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
5849                break;
5850            case 0x0e: /* fnstenv mem */
5851                gen_update_cc_op(s);
5852                gen_jmp_im(pc_start - s->cs_base);
5853                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5854                break;
5855            case 0x0f: /* fnstcw mem */
5856                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
5857                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5858                                    s->mem_index, MO_LEUW);
5859                break;
5860            case 0x1d: /* fldt mem */
5861                gen_update_cc_op(s);
5862                gen_jmp_im(pc_start - s->cs_base);
5863                gen_helper_fldt_ST0(cpu_env, cpu_A0);
5864                break;
5865            case 0x1f: /* fstpt mem */
5866                gen_update_cc_op(s);
5867                gen_jmp_im(pc_start - s->cs_base);
5868                gen_helper_fstt_ST0(cpu_env, cpu_A0);
5869                gen_helper_fpop(cpu_env);
5870                break;
5871            case 0x2c: /* frstor mem */
5872                gen_update_cc_op(s);
5873                gen_jmp_im(pc_start - s->cs_base);
5874                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5875                break;
5876            case 0x2e: /* fnsave mem */
5877                gen_update_cc_op(s);
5878                gen_jmp_im(pc_start - s->cs_base);
5879                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
5880                break;
5881            case 0x2f: /* fnstsw mem */
5882                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
5883                tcg_gen_qemu_st_i32(cpu_tmp2_i32, cpu_A0,
5884                                    s->mem_index, MO_LEUW);
5885                break;
5886            case 0x3c: /* fbld */
5887                gen_update_cc_op(s);
5888                gen_jmp_im(pc_start - s->cs_base);
5889                gen_helper_fbld_ST0(cpu_env, cpu_A0);
5890                break;
5891            case 0x3e: /* fbstp */
5892                gen_update_cc_op(s);
5893                gen_jmp_im(pc_start - s->cs_base);
5894                gen_helper_fbst_ST0(cpu_env, cpu_A0);
5895                gen_helper_fpop(cpu_env);
5896                break;
5897            case 0x3d: /* fildll */
5898                tcg_gen_qemu_ld_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5899                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
5900                break;
5901            case 0x3f: /* fistpll */
5902                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
5903                tcg_gen_qemu_st_i64(cpu_tmp1_i64, cpu_A0, s->mem_index, MO_LEQ);
5904                gen_helper_fpop(cpu_env);
5905                break;
5906            default:
5907                goto illegal_op;
5908            }
5909        } else {
5910            /* register float ops */
5911            opreg = rm;
5912
5913            switch(op) {
5914            case 0x08: /* fld sti */
5915                gen_helper_fpush(cpu_env);
5916                gen_helper_fmov_ST0_STN(cpu_env,
5917                                        tcg_const_i32((opreg + 1) & 7));
5918                break;
5919            case 0x09: /* fxchg sti */
5920            case 0x29: /* fxchg4 sti, undocumented op */
5921            case 0x39: /* fxchg7 sti, undocumented op */
5922                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
5923                break;
5924            case 0x0a: /* grp d9/2 */
5925                switch(rm) {
5926                case 0: /* fnop */
5927                    /* check exceptions (FreeBSD FPU probe) */
5928                    gen_update_cc_op(s);
5929                    gen_jmp_im(pc_start - s->cs_base);
5930                    gen_helper_fwait(cpu_env);
5931                    break;
5932                default:
5933                    goto illegal_op;
5934                }
5935                break;
5936            case 0x0c: /* grp d9/4 */
5937                switch(rm) {
5938                case 0: /* fchs */
5939                    gen_helper_fchs_ST0(cpu_env);
5940                    break;
5941                case 1: /* fabs */
5942                    gen_helper_fabs_ST0(cpu_env);
5943                    break;
5944                case 4: /* ftst */
5945                    gen_helper_fldz_FT0(cpu_env);
5946                    gen_helper_fcom_ST0_FT0(cpu_env);
5947                    break;
5948                case 5: /* fxam */
5949                    gen_helper_fxam_ST0(cpu_env);
5950                    break;
5951                default:
5952                    goto illegal_op;
5953                }
5954                break;
5955            case 0x0d: /* grp d9/5 */
5956                {
5957                    switch(rm) {
5958                    case 0:
5959                        gen_helper_fpush(cpu_env);
5960                        gen_helper_fld1_ST0(cpu_env);
5961                        break;
5962                    case 1:
5963                        gen_helper_fpush(cpu_env);
5964                        gen_helper_fldl2t_ST0(cpu_env);
5965                        break;
5966                    case 2:
5967                        gen_helper_fpush(cpu_env);
5968                        gen_helper_fldl2e_ST0(cpu_env);
5969                        break;
5970                    case 3:
5971                        gen_helper_fpush(cpu_env);
5972                        gen_helper_fldpi_ST0(cpu_env);
5973                        break;
5974                    case 4:
5975                        gen_helper_fpush(cpu_env);
5976                        gen_helper_fldlg2_ST0(cpu_env);
5977                        break;
5978                    case 5:
5979                        gen_helper_fpush(cpu_env);
5980                        gen_helper_fldln2_ST0(cpu_env);
5981                        break;
5982                    case 6:
5983                        gen_helper_fpush(cpu_env);
5984                        gen_helper_fldz_ST0(cpu_env);
5985                        break;
5986                    default:
5987                        goto illegal_op;
5988                    }
5989                }
5990                break;
5991            case 0x0e: /* grp d9/6 */
5992                switch(rm) {
5993                case 0: /* f2xm1 */
5994                    gen_helper_f2xm1(cpu_env);
5995                    break;
5996                case 1: /* fyl2x */
5997                    gen_helper_fyl2x(cpu_env);
5998                    break;
5999                case 2: /* fptan */
6000                    gen_helper_fptan(cpu_env);
6001                    break;
6002                case 3: /* fpatan */
6003                    gen_helper_fpatan(cpu_env);
6004                    break;
6005                case 4: /* fxtract */
6006                    gen_helper_fxtract(cpu_env);
6007                    break;
6008                case 5: /* fprem1 */
6009                    gen_helper_fprem1(cpu_env);
6010                    break;
6011                case 6: /* fdecstp */
6012                    gen_helper_fdecstp(cpu_env);
6013                    break;
6014                default:
6015                case 7: /* fincstp */
6016                    gen_helper_fincstp(cpu_env);
6017                    break;
6018                }
6019                break;
6020            case 0x0f: /* grp d9/7 */
6021                switch(rm) {
6022                case 0: /* fprem */
6023                    gen_helper_fprem(cpu_env);
6024                    break;
6025                case 1: /* fyl2xp1 */
6026                    gen_helper_fyl2xp1(cpu_env);
6027                    break;
6028                case 2: /* fsqrt */
6029                    gen_helper_fsqrt(cpu_env);
6030                    break;
6031                case 3: /* fsincos */
6032                    gen_helper_fsincos(cpu_env);
6033                    break;
6034                case 5: /* fscale */
6035                    gen_helper_fscale(cpu_env);
6036                    break;
6037                case 4: /* frndint */
6038                    gen_helper_frndint(cpu_env);
6039                    break;
6040                case 6: /* fsin */
6041                    gen_helper_fsin(cpu_env);
6042                    break;
6043                default:
6044                case 7: /* fcos */
6045                    gen_helper_fcos(cpu_env);
6046                    break;
6047                }
6048                break;
6049            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6050            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6051            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6052                {
6053                    int op1;
6054
6055                    op1 = op & 7;
6056                    if (op >= 0x20) {
6057                        gen_helper_fp_arith_STN_ST0(op1, opreg);
6058                        if (op >= 0x30)
6059                            gen_helper_fpop(cpu_env);
6060                    } else {
6061                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6062                        gen_helper_fp_arith_ST0_FT0(op1);
6063                    }
6064                }
6065                break;
6066            case 0x02: /* fcom */
6067            case 0x22: /* fcom2, undocumented op */
6068                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6069                gen_helper_fcom_ST0_FT0(cpu_env);
6070                break;
6071            case 0x03: /* fcomp */
6072            case 0x23: /* fcomp3, undocumented op */
6073            case 0x32: /* fcomp5, undocumented op */
6074                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6075                gen_helper_fcom_ST0_FT0(cpu_env);
6076                gen_helper_fpop(cpu_env);
6077                break;
6078            case 0x15: /* da/5 */
6079                switch(rm) {
6080                case 1: /* fucompp */
6081                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6082                    gen_helper_fucom_ST0_FT0(cpu_env);
6083                    gen_helper_fpop(cpu_env);
6084                    gen_helper_fpop(cpu_env);
6085                    break;
6086                default:
6087                    goto illegal_op;
6088                }
6089                break;
6090            case 0x1c:
6091                switch(rm) {
6092                case 0: /* feni (287 only, just do nop here) */
6093                    break;
6094                case 1: /* fdisi (287 only, just do nop here) */
6095                    break;
6096                case 2: /* fclex */
6097                    gen_helper_fclex(cpu_env);
6098                    break;
6099                case 3: /* fninit */
6100                    gen_helper_fninit(cpu_env);
6101                    break;
6102                case 4: /* fsetpm (287 only, just do nop here) */
6103                    break;
6104                default:
6105                    goto illegal_op;
6106                }
6107                break;
6108            case 0x1d: /* fucomi */
6109                if (!(s->cpuid_features & CPUID_CMOV)) {
6110                    goto illegal_op;
6111                }
6112                gen_update_cc_op(s);
6113                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6114                gen_helper_fucomi_ST0_FT0(cpu_env);
6115                set_cc_op(s, CC_OP_EFLAGS);
6116                break;
6117            case 0x1e: /* fcomi */
6118                if (!(s->cpuid_features & CPUID_CMOV)) {
6119                    goto illegal_op;
6120                }
6121                gen_update_cc_op(s);
6122                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6123                gen_helper_fcomi_ST0_FT0(cpu_env);
6124                set_cc_op(s, CC_OP_EFLAGS);
6125                break;
6126            case 0x28: /* ffree sti */
6127                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6128                break;
6129            case 0x2a: /* fst sti */
6130                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6131                break;
6132            case 0x2b: /* fstp sti */
6133            case 0x0b: /* fstp1 sti, undocumented op */
6134            case 0x3a: /* fstp8 sti, undocumented op */
6135            case 0x3b: /* fstp9 sti, undocumented op */
6136                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6137                gen_helper_fpop(cpu_env);
6138                break;
6139            case 0x2c: /* fucom st(i) */
6140                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6141                gen_helper_fucom_ST0_FT0(cpu_env);
6142                break;
6143            case 0x2d: /* fucomp st(i) */
6144                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6145                gen_helper_fucom_ST0_FT0(cpu_env);
6146                gen_helper_fpop(cpu_env);
6147                break;
6148            case 0x33: /* de/3 */
6149                switch(rm) {
6150                case 1: /* fcompp */
6151                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6152                    gen_helper_fcom_ST0_FT0(cpu_env);
6153                    gen_helper_fpop(cpu_env);
6154                    gen_helper_fpop(cpu_env);
6155                    break;
6156                default:
6157                    goto illegal_op;
6158                }
6159                break;
6160            case 0x38: /* ffreep sti, undocumented op */
6161                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6162                gen_helper_fpop(cpu_env);
6163                break;
6164            case 0x3c: /* df/4 */
6165                switch(rm) {
6166                case 0:
6167                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
6168                    tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
6169                    gen_op_mov_reg_v(MO_16, R_EAX, cpu_T[0]);
6170                    break;
6171                default:
6172                    goto illegal_op;
6173                }
6174                break;
6175            case 0x3d: /* fucomip */
6176                if (!(s->cpuid_features & CPUID_CMOV)) {
6177                    goto illegal_op;
6178                }
6179                gen_update_cc_op(s);
6180                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6181                gen_helper_fucomi_ST0_FT0(cpu_env);
6182                gen_helper_fpop(cpu_env);
6183                set_cc_op(s, CC_OP_EFLAGS);
6184                break;
6185            case 0x3e: /* fcomip */
6186                if (!(s->cpuid_features & CPUID_CMOV)) {
6187                    goto illegal_op;
6188                }
6189                gen_update_cc_op(s);
6190                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6191                gen_helper_fcomi_ST0_FT0(cpu_env);
6192                gen_helper_fpop(cpu_env);
6193                set_cc_op(s, CC_OP_EFLAGS);
6194                break;
6195            case 0x10 ... 0x13: /* fcmovxx */
6196            case 0x18 ... 0x1b:
6197                {
6198                    int op1, l1;
6199                    static const uint8_t fcmov_cc[8] = {
6200                        (JCC_B << 1),
6201                        (JCC_Z << 1),
6202                        (JCC_BE << 1),
6203                        (JCC_P << 1),
6204                    };
6205
6206                    if (!(s->cpuid_features & CPUID_CMOV)) {
6207                        goto illegal_op;
6208                    }
6209                    op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6210                    l1 = gen_new_label();
6211                    gen_jcc1_noeob(s, op1, l1);
6212                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6213                    gen_set_label(l1);
6214                }
6215                break;
6216            default:
6217                goto illegal_op;
6218            }
6219        }
6220        break;
6221        /************************/
6222        /* string ops */
6223
6224    case 0xa4: /* movsS */
6225    case 0xa5:
6226        ot = mo_b_d(b, dflag);
6227        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6228            gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6229        } else {
6230            gen_movs(s, ot);
6231        }
6232        break;
6233
6234    case 0xaa: /* stosS */
6235    case 0xab:
6236        ot = mo_b_d(b, dflag);
6237        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6238            gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6239        } else {
6240            gen_stos(s, ot);
6241        }
6242        break;
6243    case 0xac: /* lodsS */
6244    case 0xad:
6245        ot = mo_b_d(b, dflag);
6246        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6247            gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6248        } else {
6249            gen_lods(s, ot);
6250        }
6251        break;
6252    case 0xae: /* scasS */
6253    case 0xaf:
6254        ot = mo_b_d(b, dflag);
6255        if (prefixes & PREFIX_REPNZ) {
6256            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6257        } else if (prefixes & PREFIX_REPZ) {
6258            gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6259        } else {
6260            gen_scas(s, ot);
6261        }
6262        break;
6263
6264    case 0xa6: /* cmpsS */
6265    case 0xa7:
6266        ot = mo_b_d(b, dflag);
6267        if (prefixes & PREFIX_REPNZ) {
6268            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6269        } else if (prefixes & PREFIX_REPZ) {
6270            gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6271        } else {
6272            gen_cmps(s, ot);
6273        }
6274        break;
6275    case 0x6c: /* insS */
6276    case 0x6d:
6277        ot = mo_b_d32(b, dflag);
6278        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
6279        gen_check_io(s, ot, pc_start - s->cs_base, 
6280                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6281        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6282            gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6283        } else {
6284            gen_ins(s, ot);
6285            if (use_icount) {
6286                gen_jmp(s, s->pc - s->cs_base);
6287            }
6288        }
6289        break;
6290    case 0x6e: /* outsS */
6291    case 0x6f:
6292        ot = mo_b_d32(b, dflag);
6293        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
6294        gen_check_io(s, ot, pc_start - s->cs_base,
6295                     svm_is_rep(prefixes) | 4);
6296        if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6297            gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6298        } else {
6299            gen_outs(s, ot);
6300            if (use_icount) {
6301                gen_jmp(s, s->pc - s->cs_base);
6302            }
6303        }
6304        break;
6305
6306        /************************/
6307        /* port I/O */
6308
6309    case 0xe4:
6310    case 0xe5:
6311        ot = mo_b_d32(b, dflag);
6312        val = cpu_ldub_code(env, s->pc++);
6313        tcg_gen_movi_tl(cpu_T[0], val);
6314        gen_check_io(s, ot, pc_start - s->cs_base,
6315                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6316        if (use_icount)
6317            gen_io_start();
6318        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6319        gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
6320        gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
6321        if (use_icount) {
6322            gen_io_end();
6323            gen_jmp(s, s->pc - s->cs_base);
6324        }
6325        break;
6326    case 0xe6:
6327    case 0xe7:
6328        ot = mo_b_d32(b, dflag);
6329        val = cpu_ldub_code(env, s->pc++);
6330        tcg_gen_movi_tl(cpu_T[0], val);
6331        gen_check_io(s, ot, pc_start - s->cs_base,
6332                     svm_is_rep(prefixes));
6333        gen_op_mov_v_reg(ot, cpu_T[1], R_EAX);
6334
6335        if (use_icount)
6336            gen_io_start();
6337        tcg_gen_movi_i32(cpu_tmp2_i32, val);
6338        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
6339        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6340        if (use_icount) {
6341            gen_io_end();
6342            gen_jmp(s, s->pc - s->cs_base);
6343        }
6344        break;
6345    case 0xec:
6346    case 0xed:
6347        ot = mo_b_d32(b, dflag);
6348        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
6349        gen_check_io(s, ot, pc_start - s->cs_base,
6350                     SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6351        if (use_icount)
6352            gen_io_start();
6353        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
6354        gen_helper_in_func(ot, cpu_T[1], cpu_tmp2_i32);
6355        gen_op_mov_reg_v(ot, R_EAX, cpu_T[1]);
6356        if (use_icount) {
6357            gen_io_end();
6358            gen_jmp(s, s->pc - s->cs_base);
6359        }
6360        break;
6361    case 0xee:
6362    case 0xef:
6363        ot = mo_b_d32(b, dflag);
6364        tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
6365        gen_check_io(s, ot, pc_start - s->cs_base,
6366                     svm_is_rep(prefixes));
6367        gen_op_mov_v_reg(ot, cpu_T[1], R_EAX);
6368
6369        if (use_icount)
6370            gen_io_start();
6371        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
6372        tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
6373        gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
6374        if (use_icount) {
6375            gen_io_end();
6376            gen_jmp(s, s->pc - s->cs_base);
6377        }
6378        break;
6379
6380        /************************/
6381        /* control */
6382    case 0xc2: /* ret im */
6383        val = cpu_ldsw_code(env, s->pc);
6384        s->pc += 2;
6385        ot = gen_pop_T0(s);
6386        gen_stack_update(s, val + (1 << ot));
6387        /* Note that gen_pop_T0 uses a zero-extending load.  */
6388        gen_op_jmp_v(cpu_T[0]);
6389        gen_eob(s);
6390        break;
6391    case 0xc3: /* ret */
6392        ot = gen_pop_T0(s);
6393        gen_pop_update(s, ot);
6394        /* Note that gen_pop_T0 uses a zero-extending load.  */
6395        gen_op_jmp_v(cpu_T[0]);
6396        gen_eob(s);
6397        break;
6398    case 0xca: /* lret im */
6399        val = cpu_ldsw_code(env, s->pc);
6400        s->pc += 2;
6401    do_lret:
6402        if (s->pe && !s->vm86) {
6403            gen_update_cc_op(s);
6404            gen_jmp_im(pc_start - s->cs_base);
6405            gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6406                                      tcg_const_i32(val));
6407        } else {
6408            gen_stack_A0(s);
6409            /* pop offset */
6410            gen_op_ld_v(s, dflag, cpu_T[0], cpu_A0);
6411            /* NOTE: keeping EIP updated is not a problem in case of
6412               exception */
6413            gen_op_jmp_v(cpu_T[0]);
6414            /* pop selector */
6415            gen_op_addl_A0_im(1 << dflag);
6416            gen_op_ld_v(s, dflag, cpu_T[0], cpu_A0);
6417            gen_op_movl_seg_T0_vm(R_CS);
6418            /* add stack offset */
6419            gen_stack_update(s, val + (2 << dflag));
6420        }
6421        gen_eob(s);
6422        break;
6423    case 0xcb: /* lret */
6424        val = 0;
6425        goto do_lret;
6426    case 0xcf: /* iret */
6427        gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6428        if (!s->pe) {
6429            /* real mode */
6430            gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6431            set_cc_op(s, CC_OP_EFLAGS);
6432        } else if (s->vm86) {
6433            if (s->iopl != 3) {
6434                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6435            } else {
6436                gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6437                set_cc_op(s, CC_OP_EFLAGS);
6438            }
6439        } else {
6440            gen_update_cc_op(s);
6441            gen_jmp_im(pc_start - s->cs_base);
6442            gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6443                                      tcg_const_i32(s->pc - s->cs_base));
6444            set_cc_op(s, CC_OP_EFLAGS);
6445        }
6446        gen_eob(s);
6447        break;
6448    case 0xe8: /* call im */
6449        {
6450            if (dflag != MO_16) {
6451                tval = (int32_t)insn_get(env, s, MO_32);
6452            } else {
6453                tval = (int16_t)insn_get(env, s, MO_16);
6454            }
6455            next_eip = s->pc - s->cs_base;
6456            tval += next_eip;
6457            if (dflag == MO_16) {
6458                tval &= 0xffff;
6459            } else if (!CODE64(s)) {
6460                tval &= 0xffffffff;
6461            }
6462            tcg_gen_movi_tl(cpu_T[0], next_eip);
6463            gen_push_v(s, cpu_T[0]);
6464            gen_jmp(s, tval);
6465        }
6466        break;
6467    case 0x9a: /* lcall im */
6468        {
6469            unsigned int selector, offset;
6470
6471            if (CODE64(s))
6472                goto illegal_op;
6473            ot = dflag;
6474            offset = insn_get(env, s, ot);
6475            selector = insn_get(env, s, MO_16);
6476
6477            tcg_gen_movi_tl(cpu_T[0], selector);
6478            tcg_gen_movi_tl(cpu_T[1], offset);
6479        }
6480        goto do_lcall;
6481    case 0xe9: /* jmp im */
6482        if (dflag != MO_16) {
6483            tval = (int32_t)insn_get(env, s, MO_32);
6484        } else {
6485            tval = (int16_t)insn_get(env, s, MO_16);
6486        }
6487        tval += s->pc - s->cs_base;
6488        if (dflag == MO_16) {
6489            tval &= 0xffff;
6490        } else if (!CODE64(s)) {
6491            tval &= 0xffffffff;
6492        }
6493        gen_jmp(s, tval);
6494        break;
6495    case 0xea: /* ljmp im */
6496        {
6497            unsigned int selector, offset;
6498
6499            if (CODE64(s))
6500                goto illegal_op;
6501            ot = dflag;
6502            offset = insn_get(env, s, ot);
6503            selector = insn_get(env, s, MO_16);
6504
6505            tcg_gen_movi_tl(cpu_T[0], selector);
6506            tcg_gen_movi_tl(cpu_T[1], offset);
6507        }
6508        goto do_ljmp;
6509    case 0xeb: /* jmp Jb */
6510        tval = (int8_t)insn_get(env, s, MO_8);
6511        tval += s->pc - s->cs_base;
6512        if (dflag == MO_16) {
6513            tval &= 0xffff;
6514        }
6515        gen_jmp(s, tval);
6516        break;
6517    case 0x70 ... 0x7f: /* jcc Jb */
6518        tval = (int8_t)insn_get(env, s, MO_8);
6519        goto do_jcc;
6520    case 0x180 ... 0x18f: /* jcc Jv */
6521        if (dflag != MO_16) {
6522            tval = (int32_t)insn_get(env, s, MO_32);
6523        } else {
6524            tval = (int16_t)insn_get(env, s, MO_16);
6525        }
6526    do_jcc:
6527        next_eip = s->pc - s->cs_base;
6528        tval += next_eip;
6529        if (dflag == MO_16) {
6530            tval &= 0xffff;
6531        }
6532        gen_jcc(s, b, tval, next_eip);
6533        break;
6534
6535    case 0x190 ... 0x19f: /* setcc Gv */
6536        modrm = cpu_ldub_code(env, s->pc++);
6537        gen_setcc1(s, b, cpu_T[0]);
6538        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6539        break;
6540    case 0x140 ... 0x14f: /* cmov Gv, Ev */
6541        if (!(s->cpuid_features & CPUID_CMOV)) {
6542            goto illegal_op;
6543        }
6544        ot = dflag;
6545        modrm = cpu_ldub_code(env, s->pc++);
6546        reg = ((modrm >> 3) & 7) | rex_r;
6547        gen_cmovcc1(env, s, ot, b, modrm, reg);
6548        break;
6549
6550        /************************/
6551        /* flags */
6552    case 0x9c: /* pushf */
6553        gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6554        if (s->vm86 && s->iopl != 3) {
6555            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6556        } else {
6557            gen_update_cc_op(s);
6558            gen_helper_read_eflags(cpu_T[0], cpu_env);
6559            gen_push_v(s, cpu_T[0]);
6560        }
6561        break;
6562    case 0x9d: /* popf */
6563        gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6564        if (s->vm86 && s->iopl != 3) {
6565            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6566        } else {
6567            ot = gen_pop_T0(s);
6568            if (s->cpl == 0) {
6569                if (dflag != MO_16) {
6570                    gen_helper_write_eflags(cpu_env, cpu_T[0],
6571                                            tcg_const_i32((TF_MASK | AC_MASK |
6572                                                           ID_MASK | NT_MASK |
6573                                                           IF_MASK |
6574                                                           IOPL_MASK)));
6575                } else {
6576                    gen_helper_write_eflags(cpu_env, cpu_T[0],
6577                                            tcg_const_i32((TF_MASK | AC_MASK |
6578                                                           ID_MASK | NT_MASK |
6579                                                           IF_MASK | IOPL_MASK)
6580                                                          & 0xffff));
6581                }
6582            } else {
6583                if (s->cpl <= s->iopl) {
6584                    if (dflag != MO_16) {
6585                        gen_helper_write_eflags(cpu_env, cpu_T[0],
6586                                                tcg_const_i32((TF_MASK |
6587                                                               AC_MASK |
6588                                                               ID_MASK |
6589                                                               NT_MASK |
6590                                                               IF_MASK)));
6591                    } else {
6592                        gen_helper_write_eflags(cpu_env, cpu_T[0],
6593                                                tcg_const_i32((TF_MASK |
6594                                                               AC_MASK |
6595                                                               ID_MASK |
6596                                                               NT_MASK |
6597                                                               IF_MASK)
6598                                                              & 0xffff));
6599                    }
6600                } else {
6601                    if (dflag != MO_16) {
6602                        gen_helper_write_eflags(cpu_env, cpu_T[0],
6603                                           tcg_const_i32((TF_MASK | AC_MASK |
6604                                                          ID_MASK | NT_MASK)));
6605                    } else {
6606                        gen_helper_write_eflags(cpu_env, cpu_T[0],
6607                                           tcg_const_i32((TF_MASK | AC_MASK |
6608                                                          ID_MASK | NT_MASK)
6609                                                         & 0xffff));
6610                    }
6611                }
6612            }
6613            gen_pop_update(s, ot);
6614            set_cc_op(s, CC_OP_EFLAGS);
6615            /* abort translation because TF/AC flag may change */
6616            gen_jmp_im(s->pc - s->cs_base);
6617            gen_eob(s);
6618        }
6619        break;
6620    case 0x9e: /* sahf */
6621        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6622            goto illegal_op;
6623        gen_op_mov_v_reg(MO_8, cpu_T[0], R_AH);
6624        gen_compute_eflags(s);
6625        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6626        tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
6627        tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
6628        break;
6629    case 0x9f: /* lahf */
6630        if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6631            goto illegal_op;
6632        gen_compute_eflags(s);
6633        /* Note: gen_compute_eflags() only gives the condition codes */
6634        tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
6635        gen_op_mov_reg_v(MO_8, R_AH, cpu_T[0]);
6636        break;
6637    case 0xf5: /* cmc */
6638        gen_compute_eflags(s);
6639        tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6640        break;
6641    case 0xf8: /* clc */
6642        gen_compute_eflags(s);
6643        tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6644        break;
6645    case 0xf9: /* stc */
6646        gen_compute_eflags(s);
6647        tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6648        break;
6649    case 0xfc: /* cld */
6650        tcg_gen_movi_i32(cpu_tmp2_i32, 1);
6651        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6652        break;
6653    case 0xfd: /* std */
6654        tcg_gen_movi_i32(cpu_tmp2_i32, -1);
6655        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6656        break;
6657
6658        /************************/
6659        /* bit operations */
6660    case 0x1ba: /* bt/bts/btr/btc Gv, im */
6661        ot = dflag;
6662        modrm = cpu_ldub_code(env, s->pc++);
6663        op = (modrm >> 3) & 7;
6664        mod = (modrm >> 6) & 3;
6665        rm = (modrm & 7) | REX_B(s);
6666        if (mod != 3) {
6667            s->rip_offset = 1;
6668            gen_lea_modrm(env, s, modrm);
6669            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
6670        } else {
6671            gen_op_mov_v_reg(ot, cpu_T[0], rm);
6672        }
6673        /* load shift */
6674        val = cpu_ldub_code(env, s->pc++);
6675        tcg_gen_movi_tl(cpu_T[1], val);
6676        if (op < 4)
6677            goto illegal_op;
6678        op -= 4;
6679        goto bt_op;
6680    case 0x1a3: /* bt Gv, Ev */
6681        op = 0;
6682        goto do_btx;
6683    case 0x1ab: /* bts */
6684        op = 1;
6685        goto do_btx;
6686    case 0x1b3: /* btr */
6687        op = 2;
6688        goto do_btx;
6689    case 0x1bb: /* btc */
6690        op = 3;
6691    do_btx:
6692        ot = dflag;
6693        modrm = cpu_ldub_code(env, s->pc++);
6694        reg = ((modrm >> 3) & 7) | rex_r;
6695        mod = (modrm >> 6) & 3;
6696        rm = (modrm & 7) | REX_B(s);
6697        gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
6698        if (mod != 3) {
6699            gen_lea_modrm(env, s, modrm);
6700            /* specific case: we need to add a displacement */
6701            gen_exts(ot, cpu_T[1]);
6702            tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
6703            tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
6704            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
6705            gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
6706        } else {
6707            gen_op_mov_v_reg(ot, cpu_T[0], rm);
6708        }
6709    bt_op:
6710        tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
6711        switch(op) {
6712        case 0:
6713            tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
6714            tcg_gen_movi_tl(cpu_cc_dst, 0);
6715            break;
6716        case 1:
6717            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
6718            tcg_gen_movi_tl(cpu_tmp0, 1);
6719            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
6720            tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
6721            break;
6722        case 2:
6723            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
6724            tcg_gen_movi_tl(cpu_tmp0, 1);
6725            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
6726            tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
6727            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
6728            break;
6729        default:
6730        case 3:
6731            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
6732            tcg_gen_movi_tl(cpu_tmp0, 1);
6733            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
6734            tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
6735            break;
6736        }
6737        set_cc_op(s, CC_OP_SARB + ot);
6738        if (op != 0) {
6739            if (mod != 3) {
6740                gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
6741            } else {
6742                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
6743            }
6744            tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
6745            tcg_gen_movi_tl(cpu_cc_dst, 0);
6746        }
6747        break;
6748    case 0x1bc: /* bsf / tzcnt */
6749    case 0x1bd: /* bsr / lzcnt */
6750        ot = dflag;
6751        modrm = cpu_ldub_code(env, s->pc++);
6752        reg = ((modrm >> 3) & 7) | rex_r;
6753        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6754        gen_extu(ot, cpu_T[0]);
6755
6756        /* Note that lzcnt and tzcnt are in different extensions.  */
6757        if ((prefixes & PREFIX_REPZ)
6758            && (b & 1
6759                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6760                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6761            int size = 8 << ot;
6762            tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
6763            if (b & 1) {
6764                /* For lzcnt, reduce the target_ulong result by the
6765                   number of zeros that we expect to find at the top.  */
6766                gen_helper_clz(cpu_T[0], cpu_T[0]);
6767                tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size);
6768            } else {
6769                /* For tzcnt, a zero input must return the operand size:
6770                   force all bits outside the operand size to 1.  */
6771                target_ulong mask = (target_ulong)-2 << (size - 1);
6772                tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask);
6773                gen_helper_ctz(cpu_T[0], cpu_T[0]);
6774            }
6775            /* For lzcnt/tzcnt, C and Z bits are defined and are
6776               related to the result.  */
6777            gen_op_update1_cc();
6778            set_cc_op(s, CC_OP_BMILGB + ot);
6779        } else {
6780            /* For bsr/bsf, only the Z bit is defined and it is related
6781               to the input and not the result.  */
6782            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
6783            set_cc_op(s, CC_OP_LOGICB + ot);
6784            if (b & 1) {
6785                /* For bsr, return the bit index of the first 1 bit,
6786                   not the count of leading zeros.  */
6787                gen_helper_clz(cpu_T[0], cpu_T[0]);
6788                tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1);
6789            } else {
6790                gen_helper_ctz(cpu_T[0], cpu_T[0]);
6791            }
6792            /* ??? The manual says that the output is undefined when the
6793               input is zero, but real hardware leaves it unchanged, and
6794               real programs appear to depend on that.  */
6795            tcg_gen_movi_tl(cpu_tmp0, 0);
6796            tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0,
6797                               cpu_regs[reg], cpu_T[0]);
6798        }
6799        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
6800        break;
6801        /************************/
6802        /* bcd */
6803    case 0x27: /* daa */
6804        if (CODE64(s))
6805            goto illegal_op;
6806        gen_update_cc_op(s);
6807        gen_helper_daa(cpu_env);
6808        set_cc_op(s, CC_OP_EFLAGS);
6809        break;
6810    case 0x2f: /* das */
6811        if (CODE64(s))
6812            goto illegal_op;
6813        gen_update_cc_op(s);
6814        gen_helper_das(cpu_env);
6815        set_cc_op(s, CC_OP_EFLAGS);
6816        break;
6817    case 0x37: /* aaa */
6818        if (CODE64(s))
6819            goto illegal_op;
6820        gen_update_cc_op(s);
6821        gen_helper_aaa(cpu_env);
6822        set_cc_op(s, CC_OP_EFLAGS);
6823        break;
6824    case 0x3f: /* aas */
6825        if (CODE64(s))
6826            goto illegal_op;
6827        gen_update_cc_op(s);
6828        gen_helper_aas(cpu_env);
6829        set_cc_op(s, CC_OP_EFLAGS);
6830        break;
6831    case 0xd4: /* aam */
6832        if (CODE64(s))
6833            goto illegal_op;
6834        val = cpu_ldub_code(env, s->pc++);
6835        if (val == 0) {
6836            gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
6837        } else {
6838            gen_helper_aam(cpu_env, tcg_const_i32(val));
6839            set_cc_op(s, CC_OP_LOGICB);
6840        }
6841        break;
6842    case 0xd5: /* aad */
6843        if (CODE64(s))
6844            goto illegal_op;
6845        val = cpu_ldub_code(env, s->pc++);
6846        gen_helper_aad(cpu_env, tcg_const_i32(val));
6847        set_cc_op(s, CC_OP_LOGICB);
6848        break;
6849        /************************/
6850        /* misc */
6851    case 0x90: /* nop */
6852        /* XXX: correct lock test for all insn */
6853        if (prefixes & PREFIX_LOCK) {
6854            goto illegal_op;
6855        }
6856        /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
6857        if (REX_B(s)) {
6858            goto do_xchg_reg_eax;
6859        }
6860        if (prefixes & PREFIX_REPZ) {
6861            gen_update_cc_op(s);
6862            gen_jmp_im(pc_start - s->cs_base);
6863            gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
6864            s->is_jmp = DISAS_TB_JUMP;
6865        }
6866        break;
6867    case 0x9b: /* fwait */
6868        if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
6869            (HF_MP_MASK | HF_TS_MASK)) {
6870            gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
6871        } else {
6872            gen_update_cc_op(s);
6873            gen_jmp_im(pc_start - s->cs_base);
6874            gen_helper_fwait(cpu_env);
6875        }
6876        break;
6877    case 0xcc: /* int3 */
6878        gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
6879        break;
6880    case 0xcd: /* int N */
6881        val = cpu_ldub_code(env, s->pc++);
6882        if (s->vm86 && s->iopl != 3) {
6883            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6884        } else {
6885            gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
6886        }
6887        break;
6888    case 0xce: /* into */
6889        if (CODE64(s))
6890            goto illegal_op;
6891        gen_update_cc_op(s);
6892        gen_jmp_im(pc_start - s->cs_base);
6893        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
6894        break;
6895#ifdef WANT_ICEBP
6896    case 0xf1: /* icebp (undocumented, exits to external debugger) */
6897        gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
6898#if 1
6899        gen_debug(s, pc_start - s->cs_base);
6900#else
6901        /* start debug */
6902        tb_flush(env);
6903        qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
6904#endif
6905        break;
6906#endif
6907    case 0xfa: /* cli */
6908        if (!s->vm86) {
6909            if (s->cpl <= s->iopl) {
6910                gen_helper_cli(cpu_env);
6911            } else {
6912                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6913            }
6914        } else {
6915            if (s->iopl == 3) {
6916                gen_helper_cli(cpu_env);
6917            } else {
6918                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6919            }
6920        }
6921        break;
6922    case 0xfb: /* sti */
6923        if (!s->vm86) {
6924            if (s->cpl <= s->iopl) {
6925            gen_sti:
6926                gen_helper_sti(cpu_env);
6927                /* interruptions are enabled only the first insn after sti */
6928                /* If several instructions disable interrupts, only the
6929                   _first_ does it */
6930                if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
6931                    gen_helper_set_inhibit_irq(cpu_env);
6932                /* give a chance to handle pending irqs */
6933                gen_jmp_im(s->pc - s->cs_base);
6934                gen_eob(s);
6935            } else {
6936                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6937            }
6938        } else {
6939            if (s->iopl == 3) {
6940                goto gen_sti;
6941            } else {
6942                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6943            }
6944        }
6945        break;
6946    case 0x62: /* bound */
6947        if (CODE64(s))
6948            goto illegal_op;
6949        ot = dflag;
6950        modrm = cpu_ldub_code(env, s->pc++);
6951        reg = (modrm >> 3) & 7;
6952        mod = (modrm >> 6) & 3;
6953        if (mod == 3)
6954            goto illegal_op;
6955        gen_op_mov_v_reg(ot, cpu_T[0], reg);
6956        gen_lea_modrm(env, s, modrm);
6957        gen_jmp_im(pc_start - s->cs_base);
6958        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
6959        if (ot == MO_16) {
6960            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
6961        } else {
6962            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
6963        }
6964        break;
6965    case 0x1c8 ... 0x1cf: /* bswap reg */
6966        reg = (b & 7) | REX_B(s);
6967#ifdef TARGET_X86_64
6968        if (dflag == MO_64) {
6969            gen_op_mov_v_reg(MO_64, cpu_T[0], reg);
6970            tcg_gen_bswap64_i64(cpu_T[0], cpu_T[0]);
6971            gen_op_mov_reg_v(MO_64, reg, cpu_T[0]);
6972        } else
6973#endif
6974        {
6975            gen_op_mov_v_reg(MO_32, cpu_T[0], reg);
6976            tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
6977            tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]);
6978            gen_op_mov_reg_v(MO_32, reg, cpu_T[0]);
6979        }
6980        break;
6981    case 0xd6: /* salc */
6982        if (CODE64(s))
6983            goto illegal_op;
6984        gen_compute_eflags_c(s, cpu_T[0]);
6985        tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
6986        gen_op_mov_reg_v(MO_8, R_EAX, cpu_T[0]);
6987        break;
6988    case 0xe0: /* loopnz */
6989    case 0xe1: /* loopz */
6990    case 0xe2: /* loop */
6991    case 0xe3: /* jecxz */
6992        {
6993            int l1, l2, l3;
6994
6995            tval = (int8_t)insn_get(env, s, MO_8);
6996            next_eip = s->pc - s->cs_base;
6997            tval += next_eip;
6998            if (dflag == MO_16) {
6999                tval &= 0xffff;
7000            }
7001
7002            l1 = gen_new_label();
7003            l2 = gen_new_label();
7004            l3 = gen_new_label();
7005            b &= 3;
7006            switch(b) {
7007            case 0: /* loopnz */
7008            case 1: /* loopz */
7009                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7010                gen_op_jz_ecx(s->aflag, l3);
7011                gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7012                break;
7013            case 2: /* loop */
7014                gen_op_add_reg_im(s->aflag, R_ECX, -1);
7015                gen_op_jnz_ecx(s->aflag, l1);
7016                break;
7017            default:
7018            case 3: /* jcxz */
7019                gen_op_jz_ecx(s->aflag, l1);
7020                break;
7021            }
7022
7023            gen_set_label(l3);
7024            gen_jmp_im(next_eip);
7025            tcg_gen_br(l2);
7026
7027            gen_set_label(l1);
7028            gen_jmp_im(tval);
7029            gen_set_label(l2);
7030            gen_eob(s);
7031        }
7032        break;
7033    case 0x130: /* wrmsr */
7034    case 0x132: /* rdmsr */
7035        if (s->cpl != 0) {
7036            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7037        } else {
7038            gen_update_cc_op(s);
7039            gen_jmp_im(pc_start - s->cs_base);
7040            if (b & 2) {
7041                gen_helper_rdmsr(cpu_env);
7042            } else {
7043                gen_helper_wrmsr(cpu_env);
7044            }
7045        }
7046        break;
7047    case 0x131: /* rdtsc */
7048        gen_update_cc_op(s);
7049        gen_jmp_im(pc_start - s->cs_base);
7050        if (use_icount)
7051            gen_io_start();
7052        gen_helper_rdtsc(cpu_env);
7053        if (use_icount) {
7054            gen_io_end();
7055            gen_jmp(s, s->pc - s->cs_base);
7056        }
7057        break;
7058    case 0x133: /* rdpmc */
7059        gen_update_cc_op(s);
7060        gen_jmp_im(pc_start - s->cs_base);
7061        gen_helper_rdpmc(cpu_env);
7062        break;
7063    case 0x134: /* sysenter */
7064        /* For Intel SYSENTER is valid on 64-bit */
7065        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7066            goto illegal_op;
7067        if (!s->pe) {
7068            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7069        } else {
7070            gen_update_cc_op(s);
7071            gen_jmp_im(pc_start - s->cs_base);
7072            gen_helper_sysenter(cpu_env);
7073            gen_eob(s);
7074        }
7075        break;
7076    case 0x135: /* sysexit */
7077        /* For Intel SYSEXIT is valid on 64-bit */
7078        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7079            goto illegal_op;
7080        if (!s->pe) {
7081            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7082        } else {
7083            gen_update_cc_op(s);
7084            gen_jmp_im(pc_start - s->cs_base);
7085            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7086            gen_eob(s);
7087        }
7088        break;
7089#ifdef TARGET_X86_64
7090    case 0x105: /* syscall */
7091        /* XXX: is it usable in real mode ? */
7092        gen_update_cc_op(s);
7093        gen_jmp_im(pc_start - s->cs_base);
7094        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7095        gen_eob(s);
7096        break;
7097    case 0x107: /* sysret */
7098        if (!s->pe) {
7099            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7100        } else {
7101            gen_update_cc_op(s);
7102            gen_jmp_im(pc_start - s->cs_base);
7103            gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7104            /* condition codes are modified only in long mode */
7105            if (s->lma) {
7106                set_cc_op(s, CC_OP_EFLAGS);
7107            }
7108            gen_eob(s);
7109        }
7110        break;
7111#endif
7112    case 0x1a2: /* cpuid */
7113        gen_update_cc_op(s);
7114        gen_jmp_im(pc_start - s->cs_base);
7115        gen_helper_cpuid(cpu_env);
7116        break;
7117    case 0xf4: /* hlt */
7118        if (s->cpl != 0) {
7119            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7120        } else {
7121            gen_update_cc_op(s);
7122            gen_jmp_im(pc_start - s->cs_base);
7123            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7124            s->is_jmp = DISAS_TB_JUMP;
7125        }
7126        break;
7127    case 0x100:
7128        modrm = cpu_ldub_code(env, s->pc++);
7129        mod = (modrm >> 6) & 3;
7130        op = (modrm >> 3) & 7;
7131        switch(op) {
7132        case 0: /* sldt */
7133            if (!s->pe || s->vm86)
7134                goto illegal_op;
7135            gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7136            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,ldt.selector));
7137            ot = mod == 3 ? dflag : MO_16;
7138            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7139            break;
7140        case 2: /* lldt */
7141            if (!s->pe || s->vm86)
7142                goto illegal_op;
7143            if (s->cpl != 0) {
7144                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7145            } else {
7146                gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7147                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7148                gen_jmp_im(pc_start - s->cs_base);
7149                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
7150                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
7151            }
7152            break;
7153        case 1: /* str */
7154            if (!s->pe || s->vm86)
7155                goto illegal_op;
7156            gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7157            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,tr.selector));
7158            ot = mod == 3 ? dflag : MO_16;
7159            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7160            break;
7161        case 3: /* ltr */
7162            if (!s->pe || s->vm86)
7163                goto illegal_op;
7164            if (s->cpl != 0) {
7165                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7166            } else {
7167                gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7168                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7169                gen_jmp_im(pc_start - s->cs_base);
7170                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
7171                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
7172            }
7173            break;
7174        case 4: /* verr */
7175        case 5: /* verw */
7176            if (!s->pe || s->vm86)
7177                goto illegal_op;
7178            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7179            gen_update_cc_op(s);
7180            if (op == 4) {
7181                gen_helper_verr(cpu_env, cpu_T[0]);
7182            } else {
7183                gen_helper_verw(cpu_env, cpu_T[0]);
7184            }
7185            set_cc_op(s, CC_OP_EFLAGS);
7186            break;
7187        default:
7188            goto illegal_op;
7189        }
7190        break;
7191    case 0x101:
7192        modrm = cpu_ldub_code(env, s->pc++);
7193        mod = (modrm >> 6) & 3;
7194        op = (modrm >> 3) & 7;
7195        rm = modrm & 7;
7196        switch(op) {
7197        case 0: /* sgdt */
7198            if (mod == 3)
7199                goto illegal_op;
7200            gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7201            gen_lea_modrm(env, s, modrm);
7202            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
7203            gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
7204            gen_add_A0_im(s, 2);
7205            tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.base));
7206            if (dflag == MO_16) {
7207                tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
7208            }
7209            gen_op_st_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
7210            break;
7211        case 1:
7212            if (mod == 3) {
7213                switch (rm) {
7214                case 0: /* monitor */
7215                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
7216                        s->cpl != 0)
7217                        goto illegal_op;
7218                    gen_update_cc_op(s);
7219                    gen_jmp_im(pc_start - s->cs_base);
7220                    tcg_gen_mov_tl(cpu_A0, cpu_regs[R_EAX]);
7221                    gen_extu(s->aflag, cpu_A0);
7222                    gen_add_A0_ds_seg(s);
7223                    gen_helper_monitor(cpu_env, cpu_A0);
7224                    break;
7225                case 1: /* mwait */
7226                    if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
7227                        s->cpl != 0)
7228                        goto illegal_op;
7229                    gen_update_cc_op(s);
7230                    gen_jmp_im(pc_start - s->cs_base);
7231                    gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7232                    gen_eob(s);
7233                    break;
7234                case 2: /* clac */
7235                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
7236                        s->cpl != 0) {
7237                        goto illegal_op;
7238                    }
7239                    gen_helper_clac(cpu_env);
7240                    gen_jmp_im(s->pc - s->cs_base);
7241                    gen_eob(s);
7242                    break;
7243                case 3: /* stac */
7244                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
7245                        s->cpl != 0) {
7246                        goto illegal_op;
7247                    }
7248                    gen_helper_stac(cpu_env);
7249                    gen_jmp_im(s->pc - s->cs_base);
7250                    gen_eob(s);
7251                    break;
7252                default:
7253                    goto illegal_op;
7254                }
7255            } else { /* sidt */
7256                gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7257                gen_lea_modrm(env, s, modrm);
7258                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
7259                gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
7260                gen_add_A0_im(s, 2);
7261                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.base));
7262                if (dflag == MO_16) {
7263                    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
7264                }
7265                gen_op_st_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
7266            }
7267            break;
7268        case 2: /* lgdt */
7269        case 3: /* lidt */
7270            if (mod == 3) {
7271                gen_update_cc_op(s);
7272                gen_jmp_im(pc_start - s->cs_base);
7273                switch(rm) {
7274                case 0: /* VMRUN */
7275                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7276                        goto illegal_op;
7277                    if (s->cpl != 0) {
7278                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7279                        break;
7280                    } else {
7281                        gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7282                                         tcg_const_i32(s->pc - pc_start));
7283                        tcg_gen_exit_tb(0);
7284                        s->is_jmp = DISAS_TB_JUMP;
7285                    }
7286                    break;
7287                case 1: /* VMMCALL */
7288                    if (!(s->flags & HF_SVME_MASK))
7289                        goto illegal_op;
7290                    gen_helper_vmmcall(cpu_env);
7291                    break;
7292                case 2: /* VMLOAD */
7293                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7294                        goto illegal_op;
7295                    if (s->cpl != 0) {
7296                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7297                        break;
7298                    } else {
7299                        gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7300                    }
7301                    break;
7302                case 3: /* VMSAVE */
7303                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7304                        goto illegal_op;
7305                    if (s->cpl != 0) {
7306                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7307                        break;
7308                    } else {
7309                        gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7310                    }
7311                    break;
7312                case 4: /* STGI */
7313                    if ((!(s->flags & HF_SVME_MASK) &&
7314                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
7315                        !s->pe)
7316                        goto illegal_op;
7317                    if (s->cpl != 0) {
7318                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7319                        break;
7320                    } else {
7321                        gen_helper_stgi(cpu_env);
7322                    }
7323                    break;
7324                case 5: /* CLGI */
7325                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7326                        goto illegal_op;
7327                    if (s->cpl != 0) {
7328                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7329                        break;
7330                    } else {
7331                        gen_helper_clgi(cpu_env);
7332                    }
7333                    break;
7334                case 6: /* SKINIT */
7335                    if ((!(s->flags & HF_SVME_MASK) && 
7336                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
7337                        !s->pe)
7338                        goto illegal_op;
7339                    gen_helper_skinit(cpu_env);
7340                    break;
7341                case 7: /* INVLPGA */
7342                    if (!(s->flags & HF_SVME_MASK) || !s->pe)
7343                        goto illegal_op;
7344                    if (s->cpl != 0) {
7345                        gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7346                        break;
7347                    } else {
7348                        gen_helper_invlpga(cpu_env,
7349                                           tcg_const_i32(s->aflag - 1));
7350                    }
7351                    break;
7352                default:
7353                    goto illegal_op;
7354                }
7355            } else if (s->cpl != 0) {
7356                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7357            } else {
7358                gen_svm_check_intercept(s, pc_start,
7359                                        op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
7360                gen_lea_modrm(env, s, modrm);
7361                gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
7362                gen_add_A0_im(s, 2);
7363                gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
7364                if (dflag == MO_16) {
7365                    tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffffff);
7366                }
7367                if (op == 2) {
7368                    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,gdt.base));
7369                    tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,gdt.limit));
7370                } else {
7371                    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,idt.base));
7372                    tcg_gen_st32_tl(cpu_T[1], cpu_env, offsetof(CPUX86State,idt.limit));
7373                }
7374            }
7375            break;
7376        case 4: /* smsw */
7377            gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7378#if defined TARGET_X86_64 && defined HOST_WORDS_BIGENDIAN
7379            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]) + 4);
7380#else
7381            tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
7382#endif
7383            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
7384            break;
7385        case 6: /* lmsw */
7386            if (s->cpl != 0) {
7387                gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7388            } else {
7389                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7390                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7391                gen_helper_lmsw(cpu_env, cpu_T[0]);
7392                gen_jmp_im(s->pc - s->cs_base);
7393                gen_eob(s);
7394            }
7395            break;
7396        case 7:
7397            if (mod != 3) { /* invlpg */
7398                if (s->cpl != 0) {
7399                    gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7400                } else {
7401                    gen_update_cc_op(s);
7402                    gen_jmp_im(pc_start - s->cs_base);
7403                    gen_lea_modrm(env, s, modrm);
7404                    gen_helper_invlpg(cpu_env, cpu_A0);
7405                    gen_jmp_im(s->pc - s->cs_base);
7406                    gen_eob(s);
7407                }
7408            } else {
7409                switch (rm) {
7410                case 0: /* swapgs */
7411#ifdef TARGET_X86_64
7412                    if (CODE64(s)) {
7413                        if (s->cpl != 0) {
7414                            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7415                        } else {
7416                            tcg_gen_ld_tl(cpu_T[0], cpu_env,
7417                                offsetof(CPUX86State,segs[R_GS].base));
7418                            tcg_gen_ld_tl(cpu_T[1], cpu_env,
7419                                offsetof(CPUX86State,kernelgsbase));
7420                            tcg_gen_st_tl(cpu_T[1], cpu_env,
7421                                offsetof(CPUX86State,segs[R_GS].base));
7422                            tcg_gen_st_tl(cpu_T[0], cpu_env,
7423                                offsetof(CPUX86State,kernelgsbase));
7424                        }
7425                    } else
7426#endif
7427                    {
7428                        goto illegal_op;
7429                    }
7430                    break;
7431                case 1: /* rdtscp */
7432                    if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
7433                        goto illegal_op;
7434                    gen_update_cc_op(s);
7435                    gen_jmp_im(pc_start - s->cs_base);
7436                    if (use_icount)
7437                        gen_io_start();
7438                    gen_helper_rdtscp(cpu_env);
7439                    if (use_icount) {
7440                        gen_io_end();
7441                        gen_jmp(s, s->pc - s->cs_base);
7442                    }
7443                    break;
7444                default:
7445                    goto illegal_op;
7446                }
7447            }
7448            break;
7449        default:
7450            goto illegal_op;
7451        }
7452        break;
7453    case 0x108: /* invd */
7454    case 0x109: /* wbinvd */
7455        if (s->cpl != 0) {
7456            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7457        } else {
7458            gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7459            /* nothing to do */
7460        }
7461        break;
7462    case 0x63: /* arpl or movslS (x86_64) */
7463#ifdef TARGET_X86_64
7464        if (CODE64(s)) {
7465            int d_ot;
7466            /* d_ot is the size of destination */
7467            d_ot = dflag;
7468
7469            modrm = cpu_ldub_code(env, s->pc++);
7470            reg = ((modrm >> 3) & 7) | rex_r;
7471            mod = (modrm >> 6) & 3;
7472            rm = (modrm & 7) | REX_B(s);
7473
7474            if (mod == 3) {
7475                gen_op_mov_v_reg(MO_32, cpu_T[0], rm);
7476                /* sign extend */
7477                if (d_ot == MO_64) {
7478                    tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
7479                }
7480                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
7481            } else {
7482                gen_lea_modrm(env, s, modrm);
7483                gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
7484                gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
7485            }
7486        } else
7487#endif
7488        {
7489            int label1;
7490            TCGv t0, t1, t2, a0;
7491
7492            if (!s->pe || s->vm86)
7493                goto illegal_op;
7494            t0 = tcg_temp_local_new();
7495            t1 = tcg_temp_local_new();
7496            t2 = tcg_temp_local_new();
7497            ot = MO_16;
7498            modrm = cpu_ldub_code(env, s->pc++);
7499            reg = (modrm >> 3) & 7;
7500            mod = (modrm >> 6) & 3;
7501            rm = modrm & 7;
7502            if (mod != 3) {
7503                gen_lea_modrm(env, s, modrm);
7504                gen_op_ld_v(s, ot, t0, cpu_A0);
7505                a0 = tcg_temp_local_new();
7506                tcg_gen_mov_tl(a0, cpu_A0);
7507            } else {
7508                gen_op_mov_v_reg(ot, t0, rm);
7509                TCGV_UNUSED(a0);
7510            }
7511            gen_op_mov_v_reg(ot, t1, reg);
7512            tcg_gen_andi_tl(cpu_tmp0, t0, 3);
7513            tcg_gen_andi_tl(t1, t1, 3);
7514            tcg_gen_movi_tl(t2, 0);
7515            label1 = gen_new_label();
7516            tcg_gen_brcond_tl(TCG_COND_GE, cpu_tmp0, t1, label1);
7517            tcg_gen_andi_tl(t0, t0, ~3);
7518            tcg_gen_or_tl(t0, t0, t1);
7519            tcg_gen_movi_tl(t2, CC_Z);
7520            gen_set_label(label1);
7521            if (mod != 3) {
7522                gen_op_st_v(s, ot, t0, a0);
7523                tcg_temp_free(a0);
7524           } else {
7525                gen_op_mov_reg_v(ot, rm, t0);
7526            }
7527            gen_compute_eflags(s);
7528            tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7529            tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7530            tcg_temp_free(t0);
7531            tcg_temp_free(t1);
7532            tcg_temp_free(t2);
7533        }
7534        break;
7535    case 0x102: /* lar */
7536    case 0x103: /* lsl */
7537        {
7538            int label1;
7539            TCGv t0;
7540            if (!s->pe || s->vm86)
7541                goto illegal_op;
7542            ot = dflag != MO_16 ? MO_32 : MO_16;
7543            modrm = cpu_ldub_code(env, s->pc++);
7544            reg = ((modrm >> 3) & 7) | rex_r;
7545            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7546            t0 = tcg_temp_local_new();
7547            gen_update_cc_op(s);
7548            if (b == 0x102) {
7549                gen_helper_lar(t0, cpu_env, cpu_T[0]);
7550            } else {
7551                gen_helper_lsl(t0, cpu_env, cpu_T[0]);
7552            }
7553            tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
7554            label1 = gen_new_label();
7555            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
7556            gen_op_mov_reg_v(ot, reg, t0);
7557            gen_set_label(label1);
7558            set_cc_op(s, CC_OP_EFLAGS);
7559            tcg_temp_free(t0);
7560        }
7561        break;
7562    case 0x118:
7563        modrm = cpu_ldub_code(env, s->pc++);
7564        mod = (modrm >> 6) & 3;
7565        op = (modrm >> 3) & 7;
7566        switch(op) {
7567        case 0: /* prefetchnta */
7568        case 1: /* prefetchnt0 */
7569        case 2: /* prefetchnt0 */
7570        case 3: /* prefetchnt0 */
7571            if (mod == 3)
7572                goto illegal_op;
7573            gen_lea_modrm(env, s, modrm);
7574            /* nothing more to do */
7575            break;
7576        default: /* nop (multi byte) */
7577            gen_nop_modrm(env, s, modrm);
7578            break;
7579        }
7580        break;
7581    case 0x119 ... 0x11f: /* nop (multi byte) */
7582        modrm = cpu_ldub_code(env, s->pc++);
7583        gen_nop_modrm(env, s, modrm);
7584        break;
7585    case 0x120: /* mov reg, crN */
7586    case 0x122: /* mov crN, reg */
7587        if (s->cpl != 0) {
7588            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7589        } else {
7590            modrm = cpu_ldub_code(env, s->pc++);
7591            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7592             * AMD documentation (24594.pdf) and testing of
7593             * intel 386 and 486 processors all show that the mod bits
7594             * are assumed to be 1's, regardless of actual values.
7595             */
7596            rm = (modrm & 7) | REX_B(s);
7597            reg = ((modrm >> 3) & 7) | rex_r;
7598            if (CODE64(s))
7599                ot = MO_64;
7600            else
7601                ot = MO_32;
7602            if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
7603                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
7604                reg = 8;
7605            }
7606            switch(reg) {
7607            case 0:
7608            case 2:
7609            case 3:
7610            case 4:
7611            case 8:
7612                gen_update_cc_op(s);
7613                gen_jmp_im(pc_start - s->cs_base);
7614                if (b & 2) {
7615                    gen_op_mov_v_reg(ot, cpu_T[0], rm);
7616                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
7617                                         cpu_T[0]);
7618                    gen_jmp_im(s->pc - s->cs_base);
7619                    gen_eob(s);
7620                } else {
7621                    gen_helper_read_crN(cpu_T[0], cpu_env, tcg_const_i32(reg));
7622                    gen_op_mov_reg_v(ot, rm, cpu_T[0]);
7623                }
7624                break;
7625            default:
7626                goto illegal_op;
7627            }
7628        }
7629        break;
7630    case 0x121: /* mov reg, drN */
7631    case 0x123: /* mov drN, reg */
7632        if (s->cpl != 0) {
7633            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7634        } else {
7635            modrm = cpu_ldub_code(env, s->pc++);
7636            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
7637             * AMD documentation (24594.pdf) and testing of
7638             * intel 386 and 486 processors all show that the mod bits
7639             * are assumed to be 1's, regardless of actual values.
7640             */
7641            rm = (modrm & 7) | REX_B(s);
7642            reg = ((modrm >> 3) & 7) | rex_r;
7643            if (CODE64(s))
7644                ot = MO_64;
7645            else
7646                ot = MO_32;
7647            /* XXX: do it dynamically with CR4.DE bit */
7648            if (reg == 4 || reg == 5 || reg >= 8)
7649                goto illegal_op;
7650            if (b & 2) {
7651                gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
7652                gen_op_mov_v_reg(ot, cpu_T[0], rm);
7653                gen_helper_movl_drN_T0(cpu_env, tcg_const_i32(reg), cpu_T[0]);
7654                gen_jmp_im(s->pc - s->cs_base);
7655                gen_eob(s);
7656            } else {
7657                gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
7658                tcg_gen_ld_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,dr[reg]));
7659                gen_op_mov_reg_v(ot, rm, cpu_T[0]);
7660            }
7661        }
7662        break;
7663    case 0x106: /* clts */
7664        if (s->cpl != 0) {
7665            gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7666        } else {
7667            gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7668            gen_helper_clts(cpu_env);
7669            /* abort block because static cpu state changed */
7670            gen_jmp_im(s->pc - s->cs_base);
7671            gen_eob(s);
7672        }
7673        break;
7674    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
7675    case 0x1c3: /* MOVNTI reg, mem */
7676        if (!(s->cpuid_features & CPUID_SSE2))
7677            goto illegal_op;
7678        ot = mo_64_32(dflag);
7679        modrm = cpu_ldub_code(env, s->pc++);
7680        mod = (modrm >> 6) & 3;
7681        if (mod == 3)
7682            goto illegal_op;
7683        reg = ((modrm >> 3) & 7) | rex_r;
7684        /* generate a generic store */
7685        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
7686        break;
7687    case 0x1ae:
7688        modrm = cpu_ldub_code(env, s->pc++);
7689        mod = (modrm >> 6) & 3;
7690        op = (modrm >> 3) & 7;
7691        switch(op) {
7692        case 0: /* fxsave */
7693            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
7694                (s->prefix & PREFIX_LOCK))
7695                goto illegal_op;
7696            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7697                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7698                break;
7699            }
7700            gen_lea_modrm(env, s, modrm);
7701            gen_update_cc_op(s);
7702            gen_jmp_im(pc_start - s->cs_base);
7703            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
7704            break;
7705        case 1: /* fxrstor */
7706            if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
7707                (s->prefix & PREFIX_LOCK))
7708                goto illegal_op;
7709            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
7710                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7711                break;
7712            }
7713            gen_lea_modrm(env, s, modrm);
7714            gen_update_cc_op(s);
7715            gen_jmp_im(pc_start - s->cs_base);
7716            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
7717            break;
7718        case 2: /* ldmxcsr */
7719        case 3: /* stmxcsr */
7720            if (s->flags & HF_TS_MASK) {
7721                gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7722                break;
7723            }
7724            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
7725                mod == 3)
7726                goto illegal_op;
7727            gen_lea_modrm(env, s, modrm);
7728            if (op == 2) {
7729                tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
7730                                    s->mem_index, MO_LEUL);
7731                gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
7732            } else {
7733                tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
7734                gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
7735            }
7736            break;
7737        case 5: /* lfence */
7738        case 6: /* mfence */
7739            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2))
7740                goto illegal_op;
7741            break;
7742        case 7: /* sfence / clflush */
7743            if ((modrm & 0xc7) == 0xc0) {
7744                /* sfence */
7745                /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
7746                if (!(s->cpuid_features & CPUID_SSE))
7747                    goto illegal_op;
7748            } else {
7749                /* clflush */
7750                if (!(s->cpuid_features & CPUID_CLFLUSH))
7751                    goto illegal_op;
7752                gen_lea_modrm(env, s, modrm);
7753            }
7754            break;
7755        default:
7756            goto illegal_op;
7757        }
7758        break;
7759    case 0x10d: /* 3DNow! prefetch(w) */
7760        modrm = cpu_ldub_code(env, s->pc++);
7761        mod = (modrm >> 6) & 3;
7762        if (mod == 3)
7763            goto illegal_op;
7764        gen_lea_modrm(env, s, modrm);
7765        /* ignore for now */
7766        break;
7767    case 0x1aa: /* rsm */
7768        gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
7769        if (!(s->flags & HF_SMM_MASK))
7770            goto illegal_op;
7771        gen_update_cc_op(s);
7772        gen_jmp_im(s->pc - s->cs_base);
7773        gen_helper_rsm(cpu_env);
7774        gen_eob(s);
7775        break;
7776    case 0x1b8: /* SSE4.2 popcnt */
7777        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
7778             PREFIX_REPZ)
7779            goto illegal_op;
7780        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
7781            goto illegal_op;
7782
7783        modrm = cpu_ldub_code(env, s->pc++);
7784        reg = ((modrm >> 3) & 7) | rex_r;
7785
7786        if (s->prefix & PREFIX_DATA) {
7787            ot = MO_16;
7788        } else {
7789            ot = mo_64_32(dflag);
7790        }
7791
7792        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7793        gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
7794        gen_op_mov_reg_v(ot, reg, cpu_T[0]);
7795
7796        set_cc_op(s, CC_OP_EFLAGS);
7797        break;
7798    case 0x10e ... 0x10f:
7799        /* 3DNow! instructions, ignore prefixes */
7800        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
7801    case 0x110 ... 0x117:
7802    case 0x128 ... 0x12f:
7803    case 0x138 ... 0x13a:
7804    case 0x150 ... 0x179:
7805    case 0x17c ... 0x17f:
7806    case 0x1c2:
7807    case 0x1c4 ... 0x1c6:
7808    case 0x1d0 ... 0x1fe:
7809        gen_sse(env, s, b, pc_start, rex_r);
7810        break;
7811    default:
7812        goto illegal_op;
7813    }
7814    /* lock generation */
7815    if (s->prefix & PREFIX_LOCK)
7816        gen_helper_unlock();
7817    return s->pc;
7818 illegal_op:
7819    if (s->prefix & PREFIX_LOCK)
7820        gen_helper_unlock();
7821    /* XXX: ensure that no lock was generated */
7822    gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
7823    return s->pc;
7824}
7825
7826void optimize_flags_init(void)
7827{
7828    static const char reg_names[CPU_NB_REGS][4] = {
7829#ifdef TARGET_X86_64
7830        [R_EAX] = "rax",
7831        [R_EBX] = "rbx",
7832        [R_ECX] = "rcx",
7833        [R_EDX] = "rdx",
7834        [R_ESI] = "rsi",
7835        [R_EDI] = "rdi",
7836        [R_EBP] = "rbp",
7837        [R_ESP] = "rsp",
7838        [8]  = "r8",
7839        [9]  = "r9",
7840        [10] = "r10",
7841        [11] = "r11",
7842        [12] = "r12",
7843        [13] = "r13",
7844        [14] = "r14",
7845        [15] = "r15",
7846#else
7847        [R_EAX] = "eax",
7848        [R_EBX] = "ebx",
7849        [R_ECX] = "ecx",
7850        [R_EDX] = "edx",
7851        [R_ESI] = "esi",
7852        [R_EDI] = "edi",
7853        [R_EBP] = "ebp",
7854        [R_ESP] = "esp",
7855#endif
7856    };
7857    int i;
7858
7859    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
7860    cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
7861                                       offsetof(CPUX86State, cc_op), "cc_op");
7862    cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst),
7863                                    "cc_dst");
7864    cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
7865                                    "cc_src");
7866    cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
7867                                     "cc_src2");
7868
7869    for (i = 0; i < CPU_NB_REGS; ++i) {
7870        cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
7871                                         offsetof(CPUX86State, regs[i]),
7872                                         reg_names[i]);
7873    }
7874}
7875
7876/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
7877   basic block 'tb'. If search_pc is TRUE, also generate PC
7878   information for each intermediate instruction. */
7879static inline void gen_intermediate_code_internal(X86CPU *cpu,
7880                                                  TranslationBlock *tb,
7881                                                  bool search_pc)
7882{
7883    CPUState *cs = CPU(cpu);
7884    CPUX86State *env = &cpu->env;
7885    DisasContext dc1, *dc = &dc1;
7886    target_ulong pc_ptr;
7887    uint16_t *gen_opc_end;
7888    CPUBreakpoint *bp;
7889    int j, lj;
7890    uint64_t flags;
7891    target_ulong pc_start;
7892    target_ulong cs_base;
7893    int num_insns;
7894    int max_insns;
7895
7896    /* generate intermediate code */
7897    pc_start = tb->pc;
7898    cs_base = tb->cs_base;
7899    flags = tb->flags;
7900
7901    dc->pe = (flags >> HF_PE_SHIFT) & 1;
7902    dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
7903    dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
7904    dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
7905    dc->f_st = 0;
7906    dc->vm86 = (flags >> VM_SHIFT) & 1;
7907    dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
7908    dc->iopl = (flags >> IOPL_SHIFT) & 3;
7909    dc->tf = (flags >> TF_SHIFT) & 1;
7910    dc->singlestep_enabled = cs->singlestep_enabled;
7911    dc->cc_op = CC_OP_DYNAMIC;
7912    dc->cc_op_dirty = false;
7913    dc->cs_base = cs_base;
7914    dc->tb = tb;
7915    dc->popl_esp_hack = 0;
7916    /* select memory access functions */
7917    dc->mem_index = 0;
7918    if (flags & HF_SOFTMMU_MASK) {
7919        dc->mem_index = cpu_mmu_index(env);
7920    }
7921    dc->cpuid_features = env->features[FEAT_1_EDX];
7922    dc->cpuid_ext_features = env->features[FEAT_1_ECX];
7923    dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
7924    dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
7925    dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
7926#ifdef TARGET_X86_64
7927    dc->lma = (flags >> HF_LMA_SHIFT) & 1;
7928    dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
7929#endif
7930    dc->flags = flags;
7931    dc->jmp_opt = !(dc->tf || cs->singlestep_enabled ||
7932                    (flags & HF_INHIBIT_IRQ_MASK)
7933#ifndef CONFIG_SOFTMMU
7934                    || (flags & HF_SOFTMMU_MASK)
7935#endif
7936                    );
7937#if 0
7938    /* check addseg logic */
7939    if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
7940        printf("ERROR addseg\n");
7941#endif
7942
7943    cpu_T[0] = tcg_temp_new();
7944    cpu_T[1] = tcg_temp_new();
7945    cpu_A0 = tcg_temp_new();
7946
7947    cpu_tmp0 = tcg_temp_new();
7948    cpu_tmp1_i64 = tcg_temp_new_i64();
7949    cpu_tmp2_i32 = tcg_temp_new_i32();
7950    cpu_tmp3_i32 = tcg_temp_new_i32();
7951    cpu_tmp4 = tcg_temp_new();
7952    cpu_ptr0 = tcg_temp_new_ptr();
7953    cpu_ptr1 = tcg_temp_new_ptr();
7954    cpu_cc_srcT = tcg_temp_local_new();
7955
7956    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
7957
7958    dc->is_jmp = DISAS_NEXT;
7959    pc_ptr = pc_start;
7960    lj = -1;
7961    num_insns = 0;
7962    max_insns = tb->cflags & CF_COUNT_MASK;
7963    if (max_insns == 0)
7964        max_insns = CF_COUNT_MASK;
7965
7966    gen_tb_start();
7967    for(;;) {
7968        if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
7969            QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
7970                if (bp->pc == pc_ptr &&
7971                    !((bp->flags & BP_CPU) && (tb->flags & HF_RF_MASK))) {
7972                    gen_debug(dc, pc_ptr - dc->cs_base);
7973                    break;
7974                }
7975            }
7976        }
7977        if (search_pc) {
7978            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
7979            if (lj < j) {
7980                lj++;
7981                while (lj < j)
7982                    tcg_ctx.gen_opc_instr_start[lj++] = 0;
7983            }
7984            tcg_ctx.gen_opc_pc[lj] = pc_ptr;
7985            gen_opc_cc_op[lj] = dc->cc_op;
7986            tcg_ctx.gen_opc_instr_start[lj] = 1;
7987            tcg_ctx.gen_opc_icount[lj] = num_insns;
7988        }
7989        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
7990            gen_io_start();
7991
7992        pc_ptr = disas_insn(env, dc, pc_ptr);
7993        num_insns++;
7994        /* stop translation if indicated */
7995        if (dc->is_jmp)
7996            break;
7997        /* if single step mode, we generate only one instruction and
7998           generate an exception */
7999        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8000           the flag and abort the translation to give the irqs a
8001           change to be happen */
8002        if (dc->tf || dc->singlestep_enabled ||
8003            (flags & HF_INHIBIT_IRQ_MASK)) {
8004            gen_jmp_im(pc_ptr - dc->cs_base);
8005            gen_eob(dc);
8006            break;
8007        }
8008        /* if too long translation, stop generation too */
8009        if (tcg_ctx.gen_opc_ptr >= gen_opc_end ||
8010            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
8011            num_insns >= max_insns) {
8012            gen_jmp_im(pc_ptr - dc->cs_base);
8013            gen_eob(dc);
8014            break;
8015        }
8016        if (singlestep) {
8017            gen_jmp_im(pc_ptr - dc->cs_base);
8018            gen_eob(dc);
8019            break;
8020        }
8021    }
8022    if (tb->cflags & CF_LAST_IO)
8023        gen_io_end();
8024    gen_tb_end(tb, num_insns);
8025    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
8026    /* we don't forget to fill the last values */
8027    if (search_pc) {
8028        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
8029        lj++;
8030        while (lj <= j)
8031            tcg_ctx.gen_opc_instr_start[lj++] = 0;
8032    }
8033
8034#ifdef DEBUG_DISAS
8035    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
8036        int disas_flags;
8037        qemu_log("----------------\n");
8038        qemu_log("IN: %s\n", lookup_symbol(pc_start));
8039#ifdef TARGET_X86_64
8040        if (dc->code64)
8041            disas_flags = 2;
8042        else
8043#endif
8044            disas_flags = !dc->code32;
8045        log_target_disas(env, pc_start, pc_ptr - pc_start, disas_flags);
8046        qemu_log("\n");
8047    }
8048#endif
8049
8050    if (!search_pc) {
8051        tb->size = pc_ptr - pc_start;
8052        tb->icount = num_insns;
8053    }
8054}
8055
8056void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
8057{
8058    gen_intermediate_code_internal(x86_env_get_cpu(env), tb, false);
8059}
8060
8061void gen_intermediate_code_pc(CPUX86State *env, TranslationBlock *tb)
8062{
8063    gen_intermediate_code_internal(x86_env_get_cpu(env), tb, true);
8064}
8065
8066void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int pc_pos)
8067{
8068    int cc_op;
8069#ifdef DEBUG_DISAS
8070    if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
8071        int i;
8072        qemu_log("RESTORE:\n");
8073        for(i = 0;i <= pc_pos; i++) {
8074            if (tcg_ctx.gen_opc_instr_start[i]) {
8075                qemu_log("0x%04x: " TARGET_FMT_lx "\n", i,
8076                        tcg_ctx.gen_opc_pc[i]);
8077            }
8078        }
8079        qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
8080                pc_pos, tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base,
8081                (uint32_t)tb->cs_base);
8082    }
8083#endif
8084    env->eip = tcg_ctx.gen_opc_pc[pc_pos] - tb->cs_base;
8085    cc_op = gen_opc_cc_op[pc_pos];
8086    if (cc_op != CC_OP_DYNAMIC)
8087        env->cc_op = cc_op;
8088}
8089