linux/arch/x86/kvm/emulate.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/******************************************************************************
   3 * emulate.c
   4 *
   5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
   6 *
   7 * Copyright (c) 2005 Keir Fraser
   8 *
   9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
  10 * privileged instructions:
  11 *
  12 * Copyright (C) 2006 Qumranet
  13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  14 *
  15 *   Avi Kivity <avi@qumranet.com>
  16 *   Yaniv Kamay <yaniv@qumranet.com>
  17 *
  18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
  19 */
  20
  21#include <linux/kvm_host.h>
  22#include "kvm_cache_regs.h"
  23#include "kvm_emulate.h"
  24#include <linux/stringify.h>
  25#include <asm/debugreg.h>
  26#include <asm/nospec-branch.h>
  27
  28#include "x86.h"
  29#include "tss.h"
  30#include "mmu.h"
  31#include "pmu.h"
  32
  33/*
  34 * Operand types
  35 */
  36#define OpNone             0ull
  37#define OpImplicit         1ull  /* No generic decode */
  38#define OpReg              2ull  /* Register */
  39#define OpMem              3ull  /* Memory */
  40#define OpAcc              4ull  /* Accumulator: AL/AX/EAX/RAX */
  41#define OpDI               5ull  /* ES:DI/EDI/RDI */
  42#define OpMem64            6ull  /* Memory, 64-bit */
  43#define OpImmUByte         7ull  /* Zero-extended 8-bit immediate */
  44#define OpDX               8ull  /* DX register */
  45#define OpCL               9ull  /* CL register (for shifts) */
  46#define OpImmByte         10ull  /* 8-bit sign extended immediate */
  47#define OpOne             11ull  /* Implied 1 */
  48#define OpImm             12ull  /* Sign extended up to 32-bit immediate */
  49#define OpMem16           13ull  /* Memory operand (16-bit). */
  50#define OpMem32           14ull  /* Memory operand (32-bit). */
  51#define OpImmU            15ull  /* Immediate operand, zero extended */
  52#define OpSI              16ull  /* SI/ESI/RSI */
  53#define OpImmFAddr        17ull  /* Immediate far address */
  54#define OpMemFAddr        18ull  /* Far address in memory */
  55#define OpImmU16          19ull  /* Immediate operand, 16 bits, zero extended */
  56#define OpES              20ull  /* ES */
  57#define OpCS              21ull  /* CS */
  58#define OpSS              22ull  /* SS */
  59#define OpDS              23ull  /* DS */
  60#define OpFS              24ull  /* FS */
  61#define OpGS              25ull  /* GS */
  62#define OpMem8            26ull  /* 8-bit zero extended memory operand */
  63#define OpImm64           27ull  /* Sign extended 16/32/64-bit immediate */
  64#define OpXLat            28ull  /* memory at BX/EBX/RBX + zero-extended AL */
  65#define OpAccLo           29ull  /* Low part of extended acc (AX/AX/EAX/RAX) */
  66#define OpAccHi           30ull  /* High part of extended acc (-/DX/EDX/RDX) */
  67
  68#define OpBits             5  /* Width of operand field */
  69#define OpMask             ((1ull << OpBits) - 1)
  70
  71/*
  72 * Opcode effective-address decode tables.
  73 * Note that we only emulate instructions that have at least one memory
  74 * operand (excluding implicit stack references). We assume that stack
  75 * references and instruction fetches will never occur in special memory
  76 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
  77 * not be handled.
  78 */
  79
  80/* Operand sizes: 8-bit operands or specified/overridden size. */
  81#define ByteOp      (1<<0)      /* 8-bit operands. */
  82/* Destination operand type. */
  83#define DstShift    1
  84#define ImplicitOps (OpImplicit << DstShift)
  85#define DstReg      (OpReg << DstShift)
  86#define DstMem      (OpMem << DstShift)
  87#define DstAcc      (OpAcc << DstShift)
  88#define DstDI       (OpDI << DstShift)
  89#define DstMem64    (OpMem64 << DstShift)
  90#define DstMem16    (OpMem16 << DstShift)
  91#define DstImmUByte (OpImmUByte << DstShift)
  92#define DstDX       (OpDX << DstShift)
  93#define DstAccLo    (OpAccLo << DstShift)
  94#define DstMask     (OpMask << DstShift)
  95/* Source operand type. */
  96#define SrcShift    6
  97#define SrcNone     (OpNone << SrcShift)
  98#define SrcReg      (OpReg << SrcShift)
  99#define SrcMem      (OpMem << SrcShift)
 100#define SrcMem16    (OpMem16 << SrcShift)
 101#define SrcMem32    (OpMem32 << SrcShift)
 102#define SrcImm      (OpImm << SrcShift)
 103#define SrcImmByte  (OpImmByte << SrcShift)
 104#define SrcOne      (OpOne << SrcShift)
 105#define SrcImmUByte (OpImmUByte << SrcShift)
 106#define SrcImmU     (OpImmU << SrcShift)
 107#define SrcSI       (OpSI << SrcShift)
 108#define SrcXLat     (OpXLat << SrcShift)
 109#define SrcImmFAddr (OpImmFAddr << SrcShift)
 110#define SrcMemFAddr (OpMemFAddr << SrcShift)
 111#define SrcAcc      (OpAcc << SrcShift)
 112#define SrcImmU16   (OpImmU16 << SrcShift)
 113#define SrcImm64    (OpImm64 << SrcShift)
 114#define SrcDX       (OpDX << SrcShift)
 115#define SrcMem8     (OpMem8 << SrcShift)
 116#define SrcAccHi    (OpAccHi << SrcShift)
 117#define SrcMask     (OpMask << SrcShift)
 118#define BitOp       (1<<11)
 119#define MemAbs      (1<<12)      /* Memory operand is absolute displacement */
 120#define String      (1<<13)     /* String instruction (rep capable) */
 121#define Stack       (1<<14)     /* Stack instruction (push/pop) */
 122#define GroupMask   (7<<15)     /* Opcode uses one of the group mechanisms */
 123#define Group       (1<<15)     /* Bits 3:5 of modrm byte extend opcode */
 124#define GroupDual   (2<<15)     /* Alternate decoding of mod == 3 */
 125#define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
 126#define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
 127#define Escape      (5<<15)     /* Escape to coprocessor instruction */
 128#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
 129#define ModeDual    (7<<15)     /* Different instruction for 32/64 bit */
 130#define Sse         (1<<18)     /* SSE Vector instruction */
 131/* Generic ModRM decode. */
 132#define ModRM       (1<<19)
 133/* Destination is only written; never read. */
 134#define Mov         (1<<20)
 135/* Misc flags */
 136#define Prot        (1<<21) /* instruction generates #UD if not in prot-mode */
 137#define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
 138#define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
 139#define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
 140#define Undefined   (1<<25) /* No Such Instruction */
 141#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
 142#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
 143#define No64        (1<<28)
 144#define PageTable   (1 << 29)   /* instruction used to write page table */
 145#define NotImpl     (1 << 30)   /* instruction is not implemented */
 146/* Source 2 operand type */
 147#define Src2Shift   (31)
 148#define Src2None    (OpNone << Src2Shift)
 149#define Src2Mem     (OpMem << Src2Shift)
 150#define Src2CL      (OpCL << Src2Shift)
 151#define Src2ImmByte (OpImmByte << Src2Shift)
 152#define Src2One     (OpOne << Src2Shift)
 153#define Src2Imm     (OpImm << Src2Shift)
 154#define Src2ES      (OpES << Src2Shift)
 155#define Src2CS      (OpCS << Src2Shift)
 156#define Src2SS      (OpSS << Src2Shift)
 157#define Src2DS      (OpDS << Src2Shift)
 158#define Src2FS      (OpFS << Src2Shift)
 159#define Src2GS      (OpGS << Src2Shift)
 160#define Src2Mask    (OpMask << Src2Shift)
 161#define Mmx         ((u64)1 << 40)  /* MMX Vector instruction */
 162#define AlignMask   ((u64)7 << 41)
 163#define Aligned     ((u64)1 << 41)  /* Explicitly aligned (e.g. MOVDQA) */
 164#define Unaligned   ((u64)2 << 41)  /* Explicitly unaligned (e.g. MOVDQU) */
 165#define Avx         ((u64)3 << 41)  /* Advanced Vector Extensions */
 166#define Aligned16   ((u64)4 << 41)  /* Aligned to 16 byte boundary (e.g. FXSAVE) */
 167#define Fastop      ((u64)1 << 44)  /* Use opcode::u.fastop */
 168#define NoWrite     ((u64)1 << 45)  /* No writeback */
 169#define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
 170#define NoMod       ((u64)1 << 47)  /* Mod field is ignored */
 171#define Intercept   ((u64)1 << 48)  /* Has valid intercept field */
 172#define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
 173#define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
 174#define NearBranch  ((u64)1 << 52)  /* Near branches */
 175#define No16        ((u64)1 << 53)  /* No 16 bit operand */
 176#define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
 177#define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
 178
 179#define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 180
 181#define X2(x...) x, x
 182#define X3(x...) X2(x), x
 183#define X4(x...) X2(x), X2(x)
 184#define X5(x...) X4(x), x
 185#define X6(x...) X4(x), X2(x)
 186#define X7(x...) X4(x), X3(x)
 187#define X8(x...) X4(x), X4(x)
 188#define X16(x...) X8(x), X8(x)
 189
 190#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
 191#define FASTOP_SIZE 8
 192
 193struct opcode {
 194        u64 flags : 56;
 195        u64 intercept : 8;
 196        union {
 197                int (*execute)(struct x86_emulate_ctxt *ctxt);
 198                const struct opcode *group;
 199                const struct group_dual *gdual;
 200                const struct gprefix *gprefix;
 201                const struct escape *esc;
 202                const struct instr_dual *idual;
 203                const struct mode_dual *mdual;
 204                void (*fastop)(struct fastop *fake);
 205        } u;
 206        int (*check_perm)(struct x86_emulate_ctxt *ctxt);
 207};
 208
 209struct group_dual {
 210        struct opcode mod012[8];
 211        struct opcode mod3[8];
 212};
 213
 214struct gprefix {
 215        struct opcode pfx_no;
 216        struct opcode pfx_66;
 217        struct opcode pfx_f2;
 218        struct opcode pfx_f3;
 219};
 220
 221struct escape {
 222        struct opcode op[8];
 223        struct opcode high[64];
 224};
 225
 226struct instr_dual {
 227        struct opcode mod012;
 228        struct opcode mod3;
 229};
 230
 231struct mode_dual {
 232        struct opcode mode32;
 233        struct opcode mode64;
 234};
 235
 236#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
 237
 238enum x86_transfer_type {
 239        X86_TRANSFER_NONE,
 240        X86_TRANSFER_CALL_JMP,
 241        X86_TRANSFER_RET,
 242        X86_TRANSFER_TASK_SWITCH,
 243};
 244
 245static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
 246{
 247        if (!(ctxt->regs_valid & (1 << nr))) {
 248                ctxt->regs_valid |= 1 << nr;
 249                ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
 250        }
 251        return ctxt->_regs[nr];
 252}
 253
 254static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
 255{
 256        ctxt->regs_valid |= 1 << nr;
 257        ctxt->regs_dirty |= 1 << nr;
 258        return &ctxt->_regs[nr];
 259}
 260
 261static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
 262{
 263        reg_read(ctxt, nr);
 264        return reg_write(ctxt, nr);
 265}
 266
 267static void writeback_registers(struct x86_emulate_ctxt *ctxt)
 268{
 269        unsigned reg;
 270
 271        for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
 272                ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
 273}
 274
 275static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
 276{
 277        ctxt->regs_dirty = 0;
 278        ctxt->regs_valid = 0;
 279}
 280
 281/*
 282 * These EFLAGS bits are restored from saved value during emulation, and
 283 * any changes are written back to the saved value after emulation.
 284 */
 285#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
 286                     X86_EFLAGS_PF|X86_EFLAGS_CF)
 287
 288#ifdef CONFIG_X86_64
 289#define ON64(x) x
 290#else
 291#define ON64(x)
 292#endif
 293
 294/*
 295 * fastop functions have a special calling convention:
 296 *
 297 * dst:    rax        (in/out)
 298 * src:    rdx        (in/out)
 299 * src2:   rcx        (in)
 300 * flags:  rflags     (in/out)
 301 * ex:     rsi        (in:fastop pointer, out:zero if exception)
 302 *
 303 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
 304 * different operand sizes can be reached by calculation, rather than a jump
 305 * table (which would be bigger than the code).
 306 */
 307static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
 308
 309#define __FOP_FUNC(name) \
 310        ".align " __stringify(FASTOP_SIZE) " \n\t" \
 311        ".type " name ", @function \n\t" \
 312        name ":\n\t"
 313
 314#define FOP_FUNC(name) \
 315        __FOP_FUNC(#name)
 316
 317#define __FOP_RET(name) \
 318        "ret \n\t" \
 319        ".size " name ", .-" name "\n\t"
 320
 321#define FOP_RET(name) \
 322        __FOP_RET(#name)
 323
 324#define FOP_START(op) \
 325        extern void em_##op(struct fastop *fake); \
 326        asm(".pushsection .text, \"ax\" \n\t" \
 327            ".global em_" #op " \n\t" \
 328            ".align " __stringify(FASTOP_SIZE) " \n\t" \
 329            "em_" #op ":\n\t"
 330
 331#define FOP_END \
 332            ".popsection")
 333
 334#define __FOPNOP(name) \
 335        __FOP_FUNC(name) \
 336        __FOP_RET(name)
 337
 338#define FOPNOP() \
 339        __FOPNOP(__stringify(__UNIQUE_ID(nop)))
 340
 341#define FOP1E(op,  dst) \
 342        __FOP_FUNC(#op "_" #dst) \
 343        "10: " #op " %" #dst " \n\t" \
 344        __FOP_RET(#op "_" #dst)
 345
 346#define FOP1EEX(op,  dst) \
 347        FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
 348
 349#define FASTOP1(op) \
 350        FOP_START(op) \
 351        FOP1E(op##b, al) \
 352        FOP1E(op##w, ax) \
 353        FOP1E(op##l, eax) \
 354        ON64(FOP1E(op##q, rax)) \
 355        FOP_END
 356
 357/* 1-operand, using src2 (for MUL/DIV r/m) */
 358#define FASTOP1SRC2(op, name) \
 359        FOP_START(name) \
 360        FOP1E(op, cl) \
 361        FOP1E(op, cx) \
 362        FOP1E(op, ecx) \
 363        ON64(FOP1E(op, rcx)) \
 364        FOP_END
 365
 366/* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
 367#define FASTOP1SRC2EX(op, name) \
 368        FOP_START(name) \
 369        FOP1EEX(op, cl) \
 370        FOP1EEX(op, cx) \
 371        FOP1EEX(op, ecx) \
 372        ON64(FOP1EEX(op, rcx)) \
 373        FOP_END
 374
 375#define FOP2E(op,  dst, src)       \
 376        __FOP_FUNC(#op "_" #dst "_" #src) \
 377        #op " %" #src ", %" #dst " \n\t" \
 378        __FOP_RET(#op "_" #dst "_" #src)
 379
 380#define FASTOP2(op) \
 381        FOP_START(op) \
 382        FOP2E(op##b, al, dl) \
 383        FOP2E(op##w, ax, dx) \
 384        FOP2E(op##l, eax, edx) \
 385        ON64(FOP2E(op##q, rax, rdx)) \
 386        FOP_END
 387
 388/* 2 operand, word only */
 389#define FASTOP2W(op) \
 390        FOP_START(op) \
 391        FOPNOP() \
 392        FOP2E(op##w, ax, dx) \
 393        FOP2E(op##l, eax, edx) \
 394        ON64(FOP2E(op##q, rax, rdx)) \
 395        FOP_END
 396
 397/* 2 operand, src is CL */
 398#define FASTOP2CL(op) \
 399        FOP_START(op) \
 400        FOP2E(op##b, al, cl) \
 401        FOP2E(op##w, ax, cl) \
 402        FOP2E(op##l, eax, cl) \
 403        ON64(FOP2E(op##q, rax, cl)) \
 404        FOP_END
 405
 406/* 2 operand, src and dest are reversed */
 407#define FASTOP2R(op, name) \
 408        FOP_START(name) \
 409        FOP2E(op##b, dl, al) \
 410        FOP2E(op##w, dx, ax) \
 411        FOP2E(op##l, edx, eax) \
 412        ON64(FOP2E(op##q, rdx, rax)) \
 413        FOP_END
 414
 415#define FOP3E(op,  dst, src, src2) \
 416        __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
 417        #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
 418        __FOP_RET(#op "_" #dst "_" #src "_" #src2)
 419
 420/* 3-operand, word-only, src2=cl */
 421#define FASTOP3WCL(op) \
 422        FOP_START(op) \
 423        FOPNOP() \
 424        FOP3E(op##w, ax, dx, cl) \
 425        FOP3E(op##l, eax, edx, cl) \
 426        ON64(FOP3E(op##q, rax, rdx, cl)) \
 427        FOP_END
 428
 429/* Special case for SETcc - 1 instruction per cc */
 430#define FOP_SETCC(op) \
 431        ".align 4 \n\t" \
 432        ".type " #op ", @function \n\t" \
 433        #op ": \n\t" \
 434        #op " %al \n\t" \
 435        __FOP_RET(#op)
 436
 437asm(".pushsection .fixup, \"ax\"\n"
 438    "kvm_fastop_exception: xor %esi, %esi; ret\n"
 439    ".popsection");
 440
 441FOP_START(setcc)
 442FOP_SETCC(seto)
 443FOP_SETCC(setno)
 444FOP_SETCC(setc)
 445FOP_SETCC(setnc)
 446FOP_SETCC(setz)
 447FOP_SETCC(setnz)
 448FOP_SETCC(setbe)
 449FOP_SETCC(setnbe)
 450FOP_SETCC(sets)
 451FOP_SETCC(setns)
 452FOP_SETCC(setp)
 453FOP_SETCC(setnp)
 454FOP_SETCC(setl)
 455FOP_SETCC(setnl)
 456FOP_SETCC(setle)
 457FOP_SETCC(setnle)
 458FOP_END;
 459
 460FOP_START(salc)
 461FOP_FUNC(salc)
 462"pushf; sbb %al, %al; popf \n\t"
 463FOP_RET(salc)
 464FOP_END;
 465
 466/*
 467 * XXX: inoutclob user must know where the argument is being expanded.
 468 *      Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault.
 469 */
 470#define asm_safe(insn, inoutclob...) \
 471({ \
 472        int _fault = 0; \
 473 \
 474        asm volatile("1:" insn "\n" \
 475                     "2:\n" \
 476                     ".pushsection .fixup, \"ax\"\n" \
 477                     "3: movl $1, %[_fault]\n" \
 478                     "   jmp  2b\n" \
 479                     ".popsection\n" \
 480                     _ASM_EXTABLE(1b, 3b) \
 481                     : [_fault] "+qm"(_fault) inoutclob ); \
 482 \
 483        _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
 484})
 485
 486static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
 487                                    enum x86_intercept intercept,
 488                                    enum x86_intercept_stage stage)
 489{
 490        struct x86_instruction_info info = {
 491                .intercept  = intercept,
 492                .rep_prefix = ctxt->rep_prefix,
 493                .modrm_mod  = ctxt->modrm_mod,
 494                .modrm_reg  = ctxt->modrm_reg,
 495                .modrm_rm   = ctxt->modrm_rm,
 496                .src_val    = ctxt->src.val64,
 497                .dst_val    = ctxt->dst.val64,
 498                .src_bytes  = ctxt->src.bytes,
 499                .dst_bytes  = ctxt->dst.bytes,
 500                .ad_bytes   = ctxt->ad_bytes,
 501                .next_rip   = ctxt->eip,
 502        };
 503
 504        return ctxt->ops->intercept(ctxt, &info, stage);
 505}
 506
 507static void assign_masked(ulong *dest, ulong src, ulong mask)
 508{
 509        *dest = (*dest & ~mask) | (src & mask);
 510}
 511
 512static void assign_register(unsigned long *reg, u64 val, int bytes)
 513{
 514        /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
 515        switch (bytes) {
 516        case 1:
 517                *(u8 *)reg = (u8)val;
 518                break;
 519        case 2:
 520                *(u16 *)reg = (u16)val;
 521                break;
 522        case 4:
 523                *reg = (u32)val;
 524                break;  /* 64b: zero-extend */
 525        case 8:
 526                *reg = val;
 527                break;
 528        }
 529}
 530
 531static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
 532{
 533        return (1UL << (ctxt->ad_bytes << 3)) - 1;
 534}
 535
 536static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
 537{
 538        u16 sel;
 539        struct desc_struct ss;
 540
 541        if (ctxt->mode == X86EMUL_MODE_PROT64)
 542                return ~0UL;
 543        ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
 544        return ~0U >> ((ss.d ^ 1) * 16);  /* d=0: 0xffff; d=1: 0xffffffff */
 545}
 546
 547static int stack_size(struct x86_emulate_ctxt *ctxt)
 548{
 549        return (__fls(stack_mask(ctxt)) + 1) >> 3;
 550}
 551
 552/* Access/update address held in a register, based on addressing mode. */
 553static inline unsigned long
 554address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 555{
 556        if (ctxt->ad_bytes == sizeof(unsigned long))
 557                return reg;
 558        else
 559                return reg & ad_mask(ctxt);
 560}
 561
 562static inline unsigned long
 563register_address(struct x86_emulate_ctxt *ctxt, int reg)
 564{
 565        return address_mask(ctxt, reg_read(ctxt, reg));
 566}
 567
 568static void masked_increment(ulong *reg, ulong mask, int inc)
 569{
 570        assign_masked(reg, *reg + inc, mask);
 571}
 572
 573static inline void
 574register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
 575{
 576        ulong *preg = reg_rmw(ctxt, reg);
 577
 578        assign_register(preg, *preg + inc, ctxt->ad_bytes);
 579}
 580
 581static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
 582{
 583        masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
 584}
 585
 586static u32 desc_limit_scaled(struct desc_struct *desc)
 587{
 588        u32 limit = get_desc_limit(desc);
 589
 590        return desc->g ? (limit << 12) | 0xfff : limit;
 591}
 592
 593static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
 594{
 595        if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
 596                return 0;
 597
 598        return ctxt->ops->get_cached_segment_base(ctxt, seg);
 599}
 600
 601static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
 602                             u32 error, bool valid)
 603{
 604        WARN_ON(vec > 0x1f);
 605        ctxt->exception.vector = vec;
 606        ctxt->exception.error_code = error;
 607        ctxt->exception.error_code_valid = valid;
 608        return X86EMUL_PROPAGATE_FAULT;
 609}
 610
 611static int emulate_db(struct x86_emulate_ctxt *ctxt)
 612{
 613        return emulate_exception(ctxt, DB_VECTOR, 0, false);
 614}
 615
 616static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
 617{
 618        return emulate_exception(ctxt, GP_VECTOR, err, true);
 619}
 620
 621static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
 622{
 623        return emulate_exception(ctxt, SS_VECTOR, err, true);
 624}
 625
 626static int emulate_ud(struct x86_emulate_ctxt *ctxt)
 627{
 628        return emulate_exception(ctxt, UD_VECTOR, 0, false);
 629}
 630
 631static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
 632{
 633        return emulate_exception(ctxt, TS_VECTOR, err, true);
 634}
 635
 636static int emulate_de(struct x86_emulate_ctxt *ctxt)
 637{
 638        return emulate_exception(ctxt, DE_VECTOR, 0, false);
 639}
 640
 641static int emulate_nm(struct x86_emulate_ctxt *ctxt)
 642{
 643        return emulate_exception(ctxt, NM_VECTOR, 0, false);
 644}
 645
 646static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
 647{
 648        u16 selector;
 649        struct desc_struct desc;
 650
 651        ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
 652        return selector;
 653}
 654
 655static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
 656                                 unsigned seg)
 657{
 658        u16 dummy;
 659        u32 base3;
 660        struct desc_struct desc;
 661
 662        ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
 663        ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
 664}
 665
 666static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
 667{
 668        return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
 669}
 670
 671static inline bool emul_is_noncanonical_address(u64 la,
 672                                                struct x86_emulate_ctxt *ctxt)
 673{
 674        return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la;
 675}
 676
 677/*
 678 * x86 defines three classes of vector instructions: explicitly
 679 * aligned, explicitly unaligned, and the rest, which change behaviour
 680 * depending on whether they're AVX encoded or not.
 681 *
 682 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
 683 * subject to the same check.  FXSAVE and FXRSTOR are checked here too as their
 684 * 512 bytes of data must be aligned to a 16 byte boundary.
 685 */
 686static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
 687{
 688        u64 alignment = ctxt->d & AlignMask;
 689
 690        if (likely(size < 16))
 691                return 1;
 692
 693        switch (alignment) {
 694        case Unaligned:
 695        case Avx:
 696                return 1;
 697        case Aligned16:
 698                return 16;
 699        case Aligned:
 700        default:
 701                return size;
 702        }
 703}
 704
 705static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
 706                                       struct segmented_address addr,
 707                                       unsigned *max_size, unsigned size,
 708                                       bool write, bool fetch,
 709                                       enum x86emul_mode mode, ulong *linear)
 710{
 711        struct desc_struct desc;
 712        bool usable;
 713        ulong la;
 714        u32 lim;
 715        u16 sel;
 716        u8  va_bits;
 717
 718        la = seg_base(ctxt, addr.seg) + addr.ea;
 719        *max_size = 0;
 720        switch (mode) {
 721        case X86EMUL_MODE_PROT64:
 722                *linear = la;
 723                va_bits = ctxt_virt_addr_bits(ctxt);
 724                if (get_canonical(la, va_bits) != la)
 725                        goto bad;
 726
 727                *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
 728                if (size > *max_size)
 729                        goto bad;
 730                break;
 731        default:
 732                *linear = la = (u32)la;
 733                usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
 734                                                addr.seg);
 735                if (!usable)
 736                        goto bad;
 737                /* code segment in protected mode or read-only data segment */
 738                if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
 739                                        || !(desc.type & 2)) && write)
 740                        goto bad;
 741                /* unreadable code segment */
 742                if (!fetch && (desc.type & 8) && !(desc.type & 2))
 743                        goto bad;
 744                lim = desc_limit_scaled(&desc);
 745                if (!(desc.type & 8) && (desc.type & 4)) {
 746                        /* expand-down segment */
 747                        if (addr.ea <= lim)
 748                                goto bad;
 749                        lim = desc.d ? 0xffffffff : 0xffff;
 750                }
 751                if (addr.ea > lim)
 752                        goto bad;
 753                if (lim == 0xffffffff)
 754                        *max_size = ~0u;
 755                else {
 756                        *max_size = (u64)lim + 1 - addr.ea;
 757                        if (size > *max_size)
 758                                goto bad;
 759                }
 760                break;
 761        }
 762        if (la & (insn_alignment(ctxt, size) - 1))
 763                return emulate_gp(ctxt, 0);
 764        return X86EMUL_CONTINUE;
 765bad:
 766        if (addr.seg == VCPU_SREG_SS)
 767                return emulate_ss(ctxt, 0);
 768        else
 769                return emulate_gp(ctxt, 0);
 770}
 771
 772static int linearize(struct x86_emulate_ctxt *ctxt,
 773                     struct segmented_address addr,
 774                     unsigned size, bool write,
 775                     ulong *linear)
 776{
 777        unsigned max_size;
 778        return __linearize(ctxt, addr, &max_size, size, write, false,
 779                           ctxt->mode, linear);
 780}
 781
 782static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
 783                             enum x86emul_mode mode)
 784{
 785        ulong linear;
 786        int rc;
 787        unsigned max_size;
 788        struct segmented_address addr = { .seg = VCPU_SREG_CS,
 789                                           .ea = dst };
 790
 791        if (ctxt->op_bytes != sizeof(unsigned long))
 792                addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
 793        rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
 794        if (rc == X86EMUL_CONTINUE)
 795                ctxt->_eip = addr.ea;
 796        return rc;
 797}
 798
 799static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
 800{
 801        return assign_eip(ctxt, dst, ctxt->mode);
 802}
 803
 804static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
 805                          const struct desc_struct *cs_desc)
 806{
 807        enum x86emul_mode mode = ctxt->mode;
 808        int rc;
 809
 810#ifdef CONFIG_X86_64
 811        if (ctxt->mode >= X86EMUL_MODE_PROT16) {
 812                if (cs_desc->l) {
 813                        u64 efer = 0;
 814
 815                        ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 816                        if (efer & EFER_LMA)
 817                                mode = X86EMUL_MODE_PROT64;
 818                } else
 819                        mode = X86EMUL_MODE_PROT32; /* temporary value */
 820        }
 821#endif
 822        if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
 823                mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
 824        rc = assign_eip(ctxt, dst, mode);
 825        if (rc == X86EMUL_CONTINUE)
 826                ctxt->mode = mode;
 827        return rc;
 828}
 829
 830static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
 831{
 832        return assign_eip_near(ctxt, ctxt->_eip + rel);
 833}
 834
 835static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
 836                              void *data, unsigned size)
 837{
 838        return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
 839}
 840
 841static int linear_write_system(struct x86_emulate_ctxt *ctxt,
 842                               ulong linear, void *data,
 843                               unsigned int size)
 844{
 845        return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
 846}
 847
 848static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 849                              struct segmented_address addr,
 850                              void *data,
 851                              unsigned size)
 852{
 853        int rc;
 854        ulong linear;
 855
 856        rc = linearize(ctxt, addr, size, false, &linear);
 857        if (rc != X86EMUL_CONTINUE)
 858                return rc;
 859        return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
 860}
 861
 862static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
 863                               struct segmented_address addr,
 864                               void *data,
 865                               unsigned int size)
 866{
 867        int rc;
 868        ulong linear;
 869
 870        rc = linearize(ctxt, addr, size, true, &linear);
 871        if (rc != X86EMUL_CONTINUE)
 872                return rc;
 873        return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
 874}
 875
 876/*
 877 * Prefetch the remaining bytes of the instruction without crossing page
 878 * boundary if they are not in fetch_cache yet.
 879 */
 880static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
 881{
 882        int rc;
 883        unsigned size, max_size;
 884        unsigned long linear;
 885        int cur_size = ctxt->fetch.end - ctxt->fetch.data;
 886        struct segmented_address addr = { .seg = VCPU_SREG_CS,
 887                                           .ea = ctxt->eip + cur_size };
 888
 889        /*
 890         * We do not know exactly how many bytes will be needed, and
 891         * __linearize is expensive, so fetch as much as possible.  We
 892         * just have to avoid going beyond the 15 byte limit, the end
 893         * of the segment, or the end of the page.
 894         *
 895         * __linearize is called with size 0 so that it does not do any
 896         * boundary check itself.  Instead, we use max_size to check
 897         * against op_size.
 898         */
 899        rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
 900                         &linear);
 901        if (unlikely(rc != X86EMUL_CONTINUE))
 902                return rc;
 903
 904        size = min_t(unsigned, 15UL ^ cur_size, max_size);
 905        size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
 906
 907        /*
 908         * One instruction can only straddle two pages,
 909         * and one has been loaded at the beginning of
 910         * x86_decode_insn.  So, if not enough bytes
 911         * still, we must have hit the 15-byte boundary.
 912         */
 913        if (unlikely(size < op_size))
 914                return emulate_gp(ctxt, 0);
 915
 916        rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
 917                              size, &ctxt->exception);
 918        if (unlikely(rc != X86EMUL_CONTINUE))
 919                return rc;
 920        ctxt->fetch.end += size;
 921        return X86EMUL_CONTINUE;
 922}
 923
 924static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
 925                                               unsigned size)
 926{
 927        unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
 928
 929        if (unlikely(done_size < size))
 930                return __do_insn_fetch_bytes(ctxt, size - done_size);
 931        else
 932                return X86EMUL_CONTINUE;
 933}
 934
 935/* Fetch next part of the instruction being emulated. */
 936#define insn_fetch(_type, _ctxt)                                        \
 937({      _type _x;                                                       \
 938                                                                        \
 939        rc = do_insn_fetch_bytes(_ctxt, sizeof(_type));                 \
 940        if (rc != X86EMUL_CONTINUE)                                     \
 941                goto done;                                              \
 942        ctxt->_eip += sizeof(_type);                                    \
 943        memcpy(&_x, ctxt->fetch.ptr, sizeof(_type));                    \
 944        ctxt->fetch.ptr += sizeof(_type);                               \
 945        _x;                                                             \
 946})
 947
 948#define insn_fetch_arr(_arr, _size, _ctxt)                              \
 949({                                                                      \
 950        rc = do_insn_fetch_bytes(_ctxt, _size);                         \
 951        if (rc != X86EMUL_CONTINUE)                                     \
 952                goto done;                                              \
 953        ctxt->_eip += (_size);                                          \
 954        memcpy(_arr, ctxt->fetch.ptr, _size);                           \
 955        ctxt->fetch.ptr += (_size);                                     \
 956})
 957
 958/*
 959 * Given the 'reg' portion of a ModRM byte, and a register block, return a
 960 * pointer into the block that addresses the relevant register.
 961 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
 962 */
 963static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
 964                             int byteop)
 965{
 966        void *p;
 967        int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
 968
 969        if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
 970                p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
 971        else
 972                p = reg_rmw(ctxt, modrm_reg);
 973        return p;
 974}
 975
 976static int read_descriptor(struct x86_emulate_ctxt *ctxt,
 977                           struct segmented_address addr,
 978                           u16 *size, unsigned long *address, int op_bytes)
 979{
 980        int rc;
 981
 982        if (op_bytes == 2)
 983                op_bytes = 3;
 984        *address = 0;
 985        rc = segmented_read_std(ctxt, addr, size, 2);
 986        if (rc != X86EMUL_CONTINUE)
 987                return rc;
 988        addr.ea += 2;
 989        rc = segmented_read_std(ctxt, addr, address, op_bytes);
 990        return rc;
 991}
 992
 993FASTOP2(add);
 994FASTOP2(or);
 995FASTOP2(adc);
 996FASTOP2(sbb);
 997FASTOP2(and);
 998FASTOP2(sub);
 999FASTOP2(xor);
1000FASTOP2(cmp);
1001FASTOP2(test);
1002
1003FASTOP1SRC2(mul, mul_ex);
1004FASTOP1SRC2(imul, imul_ex);
1005FASTOP1SRC2EX(div, div_ex);
1006FASTOP1SRC2EX(idiv, idiv_ex);
1007
1008FASTOP3WCL(shld);
1009FASTOP3WCL(shrd);
1010
1011FASTOP2W(imul);
1012
1013FASTOP1(not);
1014FASTOP1(neg);
1015FASTOP1(inc);
1016FASTOP1(dec);
1017
1018FASTOP2CL(rol);
1019FASTOP2CL(ror);
1020FASTOP2CL(rcl);
1021FASTOP2CL(rcr);
1022FASTOP2CL(shl);
1023FASTOP2CL(shr);
1024FASTOP2CL(sar);
1025
1026FASTOP2W(bsf);
1027FASTOP2W(bsr);
1028FASTOP2W(bt);
1029FASTOP2W(bts);
1030FASTOP2W(btr);
1031FASTOP2W(btc);
1032
1033FASTOP2(xadd);
1034
1035FASTOP2R(cmp, cmp_r);
1036
1037static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1038{
1039        /* If src is zero, do not writeback, but update flags */
1040        if (ctxt->src.val == 0)
1041                ctxt->dst.type = OP_NONE;
1042        return fastop(ctxt, em_bsf);
1043}
1044
1045static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1046{
1047        /* If src is zero, do not writeback, but update flags */
1048        if (ctxt->src.val == 0)
1049                ctxt->dst.type = OP_NONE;
1050        return fastop(ctxt, em_bsr);
1051}
1052
1053static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1054{
1055        u8 rc;
1056        void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1057
1058        flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1059        asm("push %[flags]; popf; " CALL_NOSPEC
1060            : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1061        return rc;
1062}
1063
1064static void fetch_register_operand(struct operand *op)
1065{
1066        switch (op->bytes) {
1067        case 1:
1068                op->val = *(u8 *)op->addr.reg;
1069                break;
1070        case 2:
1071                op->val = *(u16 *)op->addr.reg;
1072                break;
1073        case 4:
1074                op->val = *(u32 *)op->addr.reg;
1075                break;
1076        case 8:
1077                op->val = *(u64 *)op->addr.reg;
1078                break;
1079        }
1080}
1081
1082static int em_fninit(struct x86_emulate_ctxt *ctxt)
1083{
1084        if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1085                return emulate_nm(ctxt);
1086
1087        kvm_fpu_get();
1088        asm volatile("fninit");
1089        kvm_fpu_put();
1090        return X86EMUL_CONTINUE;
1091}
1092
1093static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1094{
1095        u16 fcw;
1096
1097        if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1098                return emulate_nm(ctxt);
1099
1100        kvm_fpu_get();
1101        asm volatile("fnstcw %0": "+m"(fcw));
1102        kvm_fpu_put();
1103
1104        ctxt->dst.val = fcw;
1105
1106        return X86EMUL_CONTINUE;
1107}
1108
1109static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1110{
1111        u16 fsw;
1112
1113        if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1114                return emulate_nm(ctxt);
1115
1116        kvm_fpu_get();
1117        asm volatile("fnstsw %0": "+m"(fsw));
1118        kvm_fpu_put();
1119
1120        ctxt->dst.val = fsw;
1121
1122        return X86EMUL_CONTINUE;
1123}
1124
1125static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1126                                    struct operand *op)
1127{
1128        unsigned reg = ctxt->modrm_reg;
1129
1130        if (!(ctxt->d & ModRM))
1131                reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1132
1133        if (ctxt->d & Sse) {
1134                op->type = OP_XMM;
1135                op->bytes = 16;
1136                op->addr.xmm = reg;
1137                kvm_read_sse_reg(reg, &op->vec_val);
1138                return;
1139        }
1140        if (ctxt->d & Mmx) {
1141                reg &= 7;
1142                op->type = OP_MM;
1143                op->bytes = 8;
1144                op->addr.mm = reg;
1145                return;
1146        }
1147
1148        op->type = OP_REG;
1149        op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1150        op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1151
1152        fetch_register_operand(op);
1153        op->orig_val = op->val;
1154}
1155
1156static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1157{
1158        if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1159                ctxt->modrm_seg = VCPU_SREG_SS;
1160}
1161
1162static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1163                        struct operand *op)
1164{
1165        u8 sib;
1166        int index_reg, base_reg, scale;
1167        int rc = X86EMUL_CONTINUE;
1168        ulong modrm_ea = 0;
1169
1170        ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1171        index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1172        base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1173
1174        ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1175        ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1176        ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1177        ctxt->modrm_seg = VCPU_SREG_DS;
1178
1179        if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1180                op->type = OP_REG;
1181                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1182                op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1183                                ctxt->d & ByteOp);
1184                if (ctxt->d & Sse) {
1185                        op->type = OP_XMM;
1186                        op->bytes = 16;
1187                        op->addr.xmm = ctxt->modrm_rm;
1188                        kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1189                        return rc;
1190                }
1191                if (ctxt->d & Mmx) {
1192                        op->type = OP_MM;
1193                        op->bytes = 8;
1194                        op->addr.mm = ctxt->modrm_rm & 7;
1195                        return rc;
1196                }
1197                fetch_register_operand(op);
1198                return rc;
1199        }
1200
1201        op->type = OP_MEM;
1202
1203        if (ctxt->ad_bytes == 2) {
1204                unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1205                unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1206                unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1207                unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1208
1209                /* 16-bit ModR/M decode. */
1210                switch (ctxt->modrm_mod) {
1211                case 0:
1212                        if (ctxt->modrm_rm == 6)
1213                                modrm_ea += insn_fetch(u16, ctxt);
1214                        break;
1215                case 1:
1216                        modrm_ea += insn_fetch(s8, ctxt);
1217                        break;
1218                case 2:
1219                        modrm_ea += insn_fetch(u16, ctxt);
1220                        break;
1221                }
1222                switch (ctxt->modrm_rm) {
1223                case 0:
1224                        modrm_ea += bx + si;
1225                        break;
1226                case 1:
1227                        modrm_ea += bx + di;
1228                        break;
1229                case 2:
1230                        modrm_ea += bp + si;
1231                        break;
1232                case 3:
1233                        modrm_ea += bp + di;
1234                        break;
1235                case 4:
1236                        modrm_ea += si;
1237                        break;
1238                case 5:
1239                        modrm_ea += di;
1240                        break;
1241                case 6:
1242                        if (ctxt->modrm_mod != 0)
1243                                modrm_ea += bp;
1244                        break;
1245                case 7:
1246                        modrm_ea += bx;
1247                        break;
1248                }
1249                if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1250                    (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1251                        ctxt->modrm_seg = VCPU_SREG_SS;
1252                modrm_ea = (u16)modrm_ea;
1253        } else {
1254                /* 32/64-bit ModR/M decode. */
1255                if ((ctxt->modrm_rm & 7) == 4) {
1256                        sib = insn_fetch(u8, ctxt);
1257                        index_reg |= (sib >> 3) & 7;
1258                        base_reg |= sib & 7;
1259                        scale = sib >> 6;
1260
1261                        if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1262                                modrm_ea += insn_fetch(s32, ctxt);
1263                        else {
1264                                modrm_ea += reg_read(ctxt, base_reg);
1265                                adjust_modrm_seg(ctxt, base_reg);
1266                                /* Increment ESP on POP [ESP] */
1267                                if ((ctxt->d & IncSP) &&
1268                                    base_reg == VCPU_REGS_RSP)
1269                                        modrm_ea += ctxt->op_bytes;
1270                        }
1271                        if (index_reg != 4)
1272                                modrm_ea += reg_read(ctxt, index_reg) << scale;
1273                } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1274                        modrm_ea += insn_fetch(s32, ctxt);
1275                        if (ctxt->mode == X86EMUL_MODE_PROT64)
1276                                ctxt->rip_relative = 1;
1277                } else {
1278                        base_reg = ctxt->modrm_rm;
1279                        modrm_ea += reg_read(ctxt, base_reg);
1280                        adjust_modrm_seg(ctxt, base_reg);
1281                }
1282                switch (ctxt->modrm_mod) {
1283                case 1:
1284                        modrm_ea += insn_fetch(s8, ctxt);
1285                        break;
1286                case 2:
1287                        modrm_ea += insn_fetch(s32, ctxt);
1288                        break;
1289                }
1290        }
1291        op->addr.mem.ea = modrm_ea;
1292        if (ctxt->ad_bytes != 8)
1293                ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1294
1295done:
1296        return rc;
1297}
1298
1299static int decode_abs(struct x86_emulate_ctxt *ctxt,
1300                      struct operand *op)
1301{
1302        int rc = X86EMUL_CONTINUE;
1303
1304        op->type = OP_MEM;
1305        switch (ctxt->ad_bytes) {
1306        case 2:
1307                op->addr.mem.ea = insn_fetch(u16, ctxt);
1308                break;
1309        case 4:
1310                op->addr.mem.ea = insn_fetch(u32, ctxt);
1311                break;
1312        case 8:
1313                op->addr.mem.ea = insn_fetch(u64, ctxt);
1314                break;
1315        }
1316done:
1317        return rc;
1318}
1319
1320static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1321{
1322        long sv = 0, mask;
1323
1324        if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1325                mask = ~((long)ctxt->dst.bytes * 8 - 1);
1326
1327                if (ctxt->src.bytes == 2)
1328                        sv = (s16)ctxt->src.val & (s16)mask;
1329                else if (ctxt->src.bytes == 4)
1330                        sv = (s32)ctxt->src.val & (s32)mask;
1331                else
1332                        sv = (s64)ctxt->src.val & (s64)mask;
1333
1334                ctxt->dst.addr.mem.ea = address_mask(ctxt,
1335                                           ctxt->dst.addr.mem.ea + (sv >> 3));
1336        }
1337
1338        /* only subword offset */
1339        ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1340}
1341
1342static int read_emulated(struct x86_emulate_ctxt *ctxt,
1343                         unsigned long addr, void *dest, unsigned size)
1344{
1345        int rc;
1346        struct read_cache *mc = &ctxt->mem_read;
1347
1348        if (mc->pos < mc->end)
1349                goto read_cached;
1350
1351        WARN_ON((mc->end + size) >= sizeof(mc->data));
1352
1353        rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1354                                      &ctxt->exception);
1355        if (rc != X86EMUL_CONTINUE)
1356                return rc;
1357
1358        mc->end += size;
1359
1360read_cached:
1361        memcpy(dest, mc->data + mc->pos, size);
1362        mc->pos += size;
1363        return X86EMUL_CONTINUE;
1364}
1365
1366static int segmented_read(struct x86_emulate_ctxt *ctxt,
1367                          struct segmented_address addr,
1368                          void *data,
1369                          unsigned size)
1370{
1371        int rc;
1372        ulong linear;
1373
1374        rc = linearize(ctxt, addr, size, false, &linear);
1375        if (rc != X86EMUL_CONTINUE)
1376                return rc;
1377        return read_emulated(ctxt, linear, data, size);
1378}
1379
1380static int segmented_write(struct x86_emulate_ctxt *ctxt,
1381                           struct segmented_address addr,
1382                           const void *data,
1383                           unsigned size)
1384{
1385        int rc;
1386        ulong linear;
1387
1388        rc = linearize(ctxt, addr, size, true, &linear);
1389        if (rc != X86EMUL_CONTINUE)
1390                return rc;
1391        return ctxt->ops->write_emulated(ctxt, linear, data, size,
1392                                         &ctxt->exception);
1393}
1394
1395static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1396                             struct segmented_address addr,
1397                             const void *orig_data, const void *data,
1398                             unsigned size)
1399{
1400        int rc;
1401        ulong linear;
1402
1403        rc = linearize(ctxt, addr, size, true, &linear);
1404        if (rc != X86EMUL_CONTINUE)
1405                return rc;
1406        return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1407                                           size, &ctxt->exception);
1408}
1409
1410static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1411                           unsigned int size, unsigned short port,
1412                           void *dest)
1413{
1414        struct read_cache *rc = &ctxt->io_read;
1415
1416        if (rc->pos == rc->end) { /* refill pio read ahead */
1417                unsigned int in_page, n;
1418                unsigned int count = ctxt->rep_prefix ?
1419                        address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1420                in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1421                        offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1422                        PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1423                n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1424                if (n == 0)
1425                        n = 1;
1426                rc->pos = rc->end = 0;
1427                if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1428                        return 0;
1429                rc->end = n * size;
1430        }
1431
1432        if (ctxt->rep_prefix && (ctxt->d & String) &&
1433            !(ctxt->eflags & X86_EFLAGS_DF)) {
1434                ctxt->dst.data = rc->data + rc->pos;
1435                ctxt->dst.type = OP_MEM_STR;
1436                ctxt->dst.count = (rc->end - rc->pos) / size;
1437                rc->pos = rc->end;
1438        } else {
1439                memcpy(dest, rc->data + rc->pos, size);
1440                rc->pos += size;
1441        }
1442        return 1;
1443}
1444
1445static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1446                                     u16 index, struct desc_struct *desc)
1447{
1448        struct desc_ptr dt;
1449        ulong addr;
1450
1451        ctxt->ops->get_idt(ctxt, &dt);
1452
1453        if (dt.size < index * 8 + 7)
1454                return emulate_gp(ctxt, index << 3 | 0x2);
1455
1456        addr = dt.address + index * 8;
1457        return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1458}
1459
1460static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1461                                     u16 selector, struct desc_ptr *dt)
1462{
1463        const struct x86_emulate_ops *ops = ctxt->ops;
1464        u32 base3 = 0;
1465
1466        if (selector & 1 << 2) {
1467                struct desc_struct desc;
1468                u16 sel;
1469
1470                memset(dt, 0, sizeof(*dt));
1471                if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1472                                      VCPU_SREG_LDTR))
1473                        return;
1474
1475                dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1476                dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1477        } else
1478                ops->get_gdt(ctxt, dt);
1479}
1480
1481static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1482                              u16 selector, ulong *desc_addr_p)
1483{
1484        struct desc_ptr dt;
1485        u16 index = selector >> 3;
1486        ulong addr;
1487
1488        get_descriptor_table_ptr(ctxt, selector, &dt);
1489
1490        if (dt.size < index * 8 + 7)
1491                return emulate_gp(ctxt, selector & 0xfffc);
1492
1493        addr = dt.address + index * 8;
1494
1495#ifdef CONFIG_X86_64
1496        if (addr >> 32 != 0) {
1497                u64 efer = 0;
1498
1499                ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1500                if (!(efer & EFER_LMA))
1501                        addr &= (u32)-1;
1502        }
1503#endif
1504
1505        *desc_addr_p = addr;
1506        return X86EMUL_CONTINUE;
1507}
1508
1509/* allowed just for 8 bytes segments */
1510static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1511                                   u16 selector, struct desc_struct *desc,
1512                                   ulong *desc_addr_p)
1513{
1514        int rc;
1515
1516        rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1517        if (rc != X86EMUL_CONTINUE)
1518                return rc;
1519
1520        return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1521}
1522
1523/* allowed just for 8 bytes segments */
1524static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1525                                    u16 selector, struct desc_struct *desc)
1526{
1527        int rc;
1528        ulong addr;
1529
1530        rc = get_descriptor_ptr(ctxt, selector, &addr);
1531        if (rc != X86EMUL_CONTINUE)
1532                return rc;
1533
1534        return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1535}
1536
1537static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1538                                     u16 selector, int seg, u8 cpl,
1539                                     enum x86_transfer_type transfer,
1540                                     struct desc_struct *desc)
1541{
1542        struct desc_struct seg_desc, old_desc;
1543        u8 dpl, rpl;
1544        unsigned err_vec = GP_VECTOR;
1545        u32 err_code = 0;
1546        bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1547        ulong desc_addr;
1548        int ret;
1549        u16 dummy;
1550        u32 base3 = 0;
1551
1552        memset(&seg_desc, 0, sizeof(seg_desc));
1553
1554        if (ctxt->mode == X86EMUL_MODE_REAL) {
1555                /* set real mode segment descriptor (keep limit etc. for
1556                 * unreal mode) */
1557                ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1558                set_desc_base(&seg_desc, selector << 4);
1559                goto load;
1560        } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1561                /* VM86 needs a clean new segment descriptor */
1562                set_desc_base(&seg_desc, selector << 4);
1563                set_desc_limit(&seg_desc, 0xffff);
1564                seg_desc.type = 3;
1565                seg_desc.p = 1;
1566                seg_desc.s = 1;
1567                seg_desc.dpl = 3;
1568                goto load;
1569        }
1570
1571        rpl = selector & 3;
1572
1573        /* TR should be in GDT only */
1574        if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1575                goto exception;
1576
1577        /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1578        if (null_selector) {
1579                if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1580                        goto exception;
1581
1582                if (seg == VCPU_SREG_SS) {
1583                        if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1584                                goto exception;
1585
1586                        /*
1587                         * ctxt->ops->set_segment expects the CPL to be in
1588                         * SS.DPL, so fake an expand-up 32-bit data segment.
1589                         */
1590                        seg_desc.type = 3;
1591                        seg_desc.p = 1;
1592                        seg_desc.s = 1;
1593                        seg_desc.dpl = cpl;
1594                        seg_desc.d = 1;
1595                        seg_desc.g = 1;
1596                }
1597
1598                /* Skip all following checks */
1599                goto load;
1600        }
1601
1602        ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1603        if (ret != X86EMUL_CONTINUE)
1604                return ret;
1605
1606        err_code = selector & 0xfffc;
1607        err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1608                                                           GP_VECTOR;
1609
1610        /* can't load system descriptor into segment selector */
1611        if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1612                if (transfer == X86_TRANSFER_CALL_JMP)
1613                        return X86EMUL_UNHANDLEABLE;
1614                goto exception;
1615        }
1616
1617        if (!seg_desc.p) {
1618                err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1619                goto exception;
1620        }
1621
1622        dpl = seg_desc.dpl;
1623
1624        switch (seg) {
1625        case VCPU_SREG_SS:
1626                /*
1627                 * segment is not a writable data segment or segment
1628                 * selector's RPL != CPL or segment selector's RPL != CPL
1629                 */
1630                if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1631                        goto exception;
1632                break;
1633        case VCPU_SREG_CS:
1634                if (!(seg_desc.type & 8))
1635                        goto exception;
1636
1637                if (seg_desc.type & 4) {
1638                        /* conforming */
1639                        if (dpl > cpl)
1640                                goto exception;
1641                } else {
1642                        /* nonconforming */
1643                        if (rpl > cpl || dpl != cpl)
1644                                goto exception;
1645                }
1646                /* in long-mode d/b must be clear if l is set */
1647                if (seg_desc.d && seg_desc.l) {
1648                        u64 efer = 0;
1649
1650                        ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1651                        if (efer & EFER_LMA)
1652                                goto exception;
1653                }
1654
1655                /* CS(RPL) <- CPL */
1656                selector = (selector & 0xfffc) | cpl;
1657                break;
1658        case VCPU_SREG_TR:
1659                if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1660                        goto exception;
1661                old_desc = seg_desc;
1662                seg_desc.type |= 2; /* busy */
1663                ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1664                                                  sizeof(seg_desc), &ctxt->exception);
1665                if (ret != X86EMUL_CONTINUE)
1666                        return ret;
1667                break;
1668        case VCPU_SREG_LDTR:
1669                if (seg_desc.s || seg_desc.type != 2)
1670                        goto exception;
1671                break;
1672        default: /*  DS, ES, FS, or GS */
1673                /*
1674                 * segment is not a data or readable code segment or
1675                 * ((segment is a data or nonconforming code segment)
1676                 * and (both RPL and CPL > DPL))
1677                 */
1678                if ((seg_desc.type & 0xa) == 0x8 ||
1679                    (((seg_desc.type & 0xc) != 0xc) &&
1680                     (rpl > dpl && cpl > dpl)))
1681                        goto exception;
1682                break;
1683        }
1684
1685        if (seg_desc.s) {
1686                /* mark segment as accessed */
1687                if (!(seg_desc.type & 1)) {
1688                        seg_desc.type |= 1;
1689                        ret = write_segment_descriptor(ctxt, selector,
1690                                                       &seg_desc);
1691                        if (ret != X86EMUL_CONTINUE)
1692                                return ret;
1693                }
1694        } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1695                ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1696                if (ret != X86EMUL_CONTINUE)
1697                        return ret;
1698                if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1699                                ((u64)base3 << 32), ctxt))
1700                        return emulate_gp(ctxt, 0);
1701        }
1702load:
1703        ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1704        if (desc)
1705                *desc = seg_desc;
1706        return X86EMUL_CONTINUE;
1707exception:
1708        return emulate_exception(ctxt, err_vec, err_code, true);
1709}
1710
1711static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1712                                   u16 selector, int seg)
1713{
1714        u8 cpl = ctxt->ops->cpl(ctxt);
1715
1716        /*
1717         * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1718         * they can load it at CPL<3 (Intel's manual says only LSS can,
1719         * but it's wrong).
1720         *
1721         * However, the Intel manual says that putting IST=1/DPL=3 in
1722         * an interrupt gate will result in SS=3 (the AMD manual instead
1723         * says it doesn't), so allow SS=3 in __load_segment_descriptor
1724         * and only forbid it here.
1725         */
1726        if (seg == VCPU_SREG_SS && selector == 3 &&
1727            ctxt->mode == X86EMUL_MODE_PROT64)
1728                return emulate_exception(ctxt, GP_VECTOR, 0, true);
1729
1730        return __load_segment_descriptor(ctxt, selector, seg, cpl,
1731                                         X86_TRANSFER_NONE, NULL);
1732}
1733
1734static void write_register_operand(struct operand *op)
1735{
1736        return assign_register(op->addr.reg, op->val, op->bytes);
1737}
1738
1739static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1740{
1741        switch (op->type) {
1742        case OP_REG:
1743                write_register_operand(op);
1744                break;
1745        case OP_MEM:
1746                if (ctxt->lock_prefix)
1747                        return segmented_cmpxchg(ctxt,
1748                                                 op->addr.mem,
1749                                                 &op->orig_val,
1750                                                 &op->val,
1751                                                 op->bytes);
1752                else
1753                        return segmented_write(ctxt,
1754                                               op->addr.mem,
1755                                               &op->val,
1756                                               op->bytes);
1757                break;
1758        case OP_MEM_STR:
1759                return segmented_write(ctxt,
1760                                       op->addr.mem,
1761                                       op->data,
1762                                       op->bytes * op->count);
1763                break;
1764        case OP_XMM:
1765                kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1766                break;
1767        case OP_MM:
1768                kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1769                break;
1770        case OP_NONE:
1771                /* no writeback */
1772                break;
1773        default:
1774                break;
1775        }
1776        return X86EMUL_CONTINUE;
1777}
1778
1779static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1780{
1781        struct segmented_address addr;
1782
1783        rsp_increment(ctxt, -bytes);
1784        addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1785        addr.seg = VCPU_SREG_SS;
1786
1787        return segmented_write(ctxt, addr, data, bytes);
1788}
1789
1790static int em_push(struct x86_emulate_ctxt *ctxt)
1791{
1792        /* Disable writeback. */
1793        ctxt->dst.type = OP_NONE;
1794        return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1795}
1796
1797static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1798                       void *dest, int len)
1799{
1800        int rc;
1801        struct segmented_address addr;
1802
1803        addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1804        addr.seg = VCPU_SREG_SS;
1805        rc = segmented_read(ctxt, addr, dest, len);
1806        if (rc != X86EMUL_CONTINUE)
1807                return rc;
1808
1809        rsp_increment(ctxt, len);
1810        return rc;
1811}
1812
1813static int em_pop(struct x86_emulate_ctxt *ctxt)
1814{
1815        return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1816}
1817
1818static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1819                        void *dest, int len)
1820{
1821        int rc;
1822        unsigned long val, change_mask;
1823        int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1824        int cpl = ctxt->ops->cpl(ctxt);
1825
1826        rc = emulate_pop(ctxt, &val, len);
1827        if (rc != X86EMUL_CONTINUE)
1828                return rc;
1829
1830        change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1831                      X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1832                      X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1833                      X86_EFLAGS_AC | X86_EFLAGS_ID;
1834
1835        switch(ctxt->mode) {
1836        case X86EMUL_MODE_PROT64:
1837        case X86EMUL_MODE_PROT32:
1838        case X86EMUL_MODE_PROT16:
1839                if (cpl == 0)
1840                        change_mask |= X86_EFLAGS_IOPL;
1841                if (cpl <= iopl)
1842                        change_mask |= X86_EFLAGS_IF;
1843                break;
1844        case X86EMUL_MODE_VM86:
1845                if (iopl < 3)
1846                        return emulate_gp(ctxt, 0);
1847                change_mask |= X86_EFLAGS_IF;
1848                break;
1849        default: /* real mode */
1850                change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1851                break;
1852        }
1853
1854        *(unsigned long *)dest =
1855                (ctxt->eflags & ~change_mask) | (val & change_mask);
1856
1857        return rc;
1858}
1859
1860static int em_popf(struct x86_emulate_ctxt *ctxt)
1861{
1862        ctxt->dst.type = OP_REG;
1863        ctxt->dst.addr.reg = &ctxt->eflags;
1864        ctxt->dst.bytes = ctxt->op_bytes;
1865        return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1866}
1867
1868static int em_enter(struct x86_emulate_ctxt *ctxt)
1869{
1870        int rc;
1871        unsigned frame_size = ctxt->src.val;
1872        unsigned nesting_level = ctxt->src2.val & 31;
1873        ulong rbp;
1874
1875        if (nesting_level)
1876                return X86EMUL_UNHANDLEABLE;
1877
1878        rbp = reg_read(ctxt, VCPU_REGS_RBP);
1879        rc = push(ctxt, &rbp, stack_size(ctxt));
1880        if (rc != X86EMUL_CONTINUE)
1881                return rc;
1882        assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1883                      stack_mask(ctxt));
1884        assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1885                      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1886                      stack_mask(ctxt));
1887        return X86EMUL_CONTINUE;
1888}
1889
1890static int em_leave(struct x86_emulate_ctxt *ctxt)
1891{
1892        assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1893                      stack_mask(ctxt));
1894        return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1895}
1896
1897static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1898{
1899        int seg = ctxt->src2.val;
1900
1901        ctxt->src.val = get_segment_selector(ctxt, seg);
1902        if (ctxt->op_bytes == 4) {
1903                rsp_increment(ctxt, -2);
1904                ctxt->op_bytes = 2;
1905        }
1906
1907        return em_push(ctxt);
1908}
1909
1910static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1911{
1912        int seg = ctxt->src2.val;
1913        unsigned long selector;
1914        int rc;
1915
1916        rc = emulate_pop(ctxt, &selector, 2);
1917        if (rc != X86EMUL_CONTINUE)
1918                return rc;
1919
1920        if (ctxt->modrm_reg == VCPU_SREG_SS)
1921                ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1922        if (ctxt->op_bytes > 2)
1923                rsp_increment(ctxt, ctxt->op_bytes - 2);
1924
1925        rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1926        return rc;
1927}
1928
1929static int em_pusha(struct x86_emulate_ctxt *ctxt)
1930{
1931        unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1932        int rc = X86EMUL_CONTINUE;
1933        int reg = VCPU_REGS_RAX;
1934
1935        while (reg <= VCPU_REGS_RDI) {
1936                (reg == VCPU_REGS_RSP) ?
1937                (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1938
1939                rc = em_push(ctxt);
1940                if (rc != X86EMUL_CONTINUE)
1941                        return rc;
1942
1943                ++reg;
1944        }
1945
1946        return rc;
1947}
1948
1949static int em_pushf(struct x86_emulate_ctxt *ctxt)
1950{
1951        ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1952        return em_push(ctxt);
1953}
1954
1955static int em_popa(struct x86_emulate_ctxt *ctxt)
1956{
1957        int rc = X86EMUL_CONTINUE;
1958        int reg = VCPU_REGS_RDI;
1959        u32 val;
1960
1961        while (reg >= VCPU_REGS_RAX) {
1962                if (reg == VCPU_REGS_RSP) {
1963                        rsp_increment(ctxt, ctxt->op_bytes);
1964                        --reg;
1965                }
1966
1967                rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
1968                if (rc != X86EMUL_CONTINUE)
1969                        break;
1970                assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
1971                --reg;
1972        }
1973        return rc;
1974}
1975
1976static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1977{
1978        const struct x86_emulate_ops *ops = ctxt->ops;
1979        int rc;
1980        struct desc_ptr dt;
1981        gva_t cs_addr;
1982        gva_t eip_addr;
1983        u16 cs, eip;
1984
1985        /* TODO: Add limit checks */
1986        ctxt->src.val = ctxt->eflags;
1987        rc = em_push(ctxt);
1988        if (rc != X86EMUL_CONTINUE)
1989                return rc;
1990
1991        ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
1992
1993        ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1994        rc = em_push(ctxt);
1995        if (rc != X86EMUL_CONTINUE)
1996                return rc;
1997
1998        ctxt->src.val = ctxt->_eip;
1999        rc = em_push(ctxt);
2000        if (rc != X86EMUL_CONTINUE)
2001                return rc;
2002
2003        ops->get_idt(ctxt, &dt);
2004
2005        eip_addr = dt.address + (irq << 2);
2006        cs_addr = dt.address + (irq << 2) + 2;
2007
2008        rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2009        if (rc != X86EMUL_CONTINUE)
2010                return rc;
2011
2012        rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2013        if (rc != X86EMUL_CONTINUE)
2014                return rc;
2015
2016        rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2017        if (rc != X86EMUL_CONTINUE)
2018                return rc;
2019
2020        ctxt->_eip = eip;
2021
2022        return rc;
2023}
2024
2025int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2026{
2027        int rc;
2028
2029        invalidate_registers(ctxt);
2030        rc = __emulate_int_real(ctxt, irq);
2031        if (rc == X86EMUL_CONTINUE)
2032                writeback_registers(ctxt);
2033        return rc;
2034}
2035
2036static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2037{
2038        switch(ctxt->mode) {
2039        case X86EMUL_MODE_REAL:
2040                return __emulate_int_real(ctxt, irq);
2041        case X86EMUL_MODE_VM86:
2042        case X86EMUL_MODE_PROT16:
2043        case X86EMUL_MODE_PROT32:
2044        case X86EMUL_MODE_PROT64:
2045        default:
2046                /* Protected mode interrupts unimplemented yet */
2047                return X86EMUL_UNHANDLEABLE;
2048        }
2049}
2050
2051static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2052{
2053        int rc = X86EMUL_CONTINUE;
2054        unsigned long temp_eip = 0;
2055        unsigned long temp_eflags = 0;
2056        unsigned long cs = 0;
2057        unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2058                             X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2059                             X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2060                             X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2061                             X86_EFLAGS_AC | X86_EFLAGS_ID |
2062                             X86_EFLAGS_FIXED;
2063        unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2064                                  X86_EFLAGS_VIP;
2065
2066        /* TODO: Add stack limit check */
2067
2068        rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2069
2070        if (rc != X86EMUL_CONTINUE)
2071                return rc;
2072
2073        if (temp_eip & ~0xffff)
2074                return emulate_gp(ctxt, 0);
2075
2076        rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2077
2078        if (rc != X86EMUL_CONTINUE)
2079                return rc;
2080
2081        rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2082
2083        if (rc != X86EMUL_CONTINUE)
2084                return rc;
2085
2086        rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2087
2088        if (rc != X86EMUL_CONTINUE)
2089                return rc;
2090
2091        ctxt->_eip = temp_eip;
2092
2093        if (ctxt->op_bytes == 4)
2094                ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2095        else if (ctxt->op_bytes == 2) {
2096                ctxt->eflags &= ~0xffff;
2097                ctxt->eflags |= temp_eflags;
2098        }
2099
2100        ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2101        ctxt->eflags |= X86_EFLAGS_FIXED;
2102        ctxt->ops->set_nmi_mask(ctxt, false);
2103
2104        return rc;
2105}
2106
2107static int em_iret(struct x86_emulate_ctxt *ctxt)
2108{
2109        switch(ctxt->mode) {
2110        case X86EMUL_MODE_REAL:
2111                return emulate_iret_real(ctxt);
2112        case X86EMUL_MODE_VM86:
2113        case X86EMUL_MODE_PROT16:
2114        case X86EMUL_MODE_PROT32:
2115        case X86EMUL_MODE_PROT64:
2116        default:
2117                /* iret from protected mode unimplemented yet */
2118                return X86EMUL_UNHANDLEABLE;
2119        }
2120}
2121
2122static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2123{
2124        int rc;
2125        unsigned short sel;
2126        struct desc_struct new_desc;
2127        u8 cpl = ctxt->ops->cpl(ctxt);
2128
2129        memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2130
2131        rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2132                                       X86_TRANSFER_CALL_JMP,
2133                                       &new_desc);
2134        if (rc != X86EMUL_CONTINUE)
2135                return rc;
2136
2137        rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2138        /* Error handling is not implemented. */
2139        if (rc != X86EMUL_CONTINUE)
2140                return X86EMUL_UNHANDLEABLE;
2141
2142        return rc;
2143}
2144
2145static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2146{
2147        return assign_eip_near(ctxt, ctxt->src.val);
2148}
2149
2150static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2151{
2152        int rc;
2153        long int old_eip;
2154
2155        old_eip = ctxt->_eip;
2156        rc = assign_eip_near(ctxt, ctxt->src.val);
2157        if (rc != X86EMUL_CONTINUE)
2158                return rc;
2159        ctxt->src.val = old_eip;
2160        rc = em_push(ctxt);
2161        return rc;
2162}
2163
2164static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2165{
2166        u64 old = ctxt->dst.orig_val64;
2167
2168        if (ctxt->dst.bytes == 16)
2169                return X86EMUL_UNHANDLEABLE;
2170
2171        if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2172            ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2173                *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2174                *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2175                ctxt->eflags &= ~X86_EFLAGS_ZF;
2176        } else {
2177                ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2178                        (u32) reg_read(ctxt, VCPU_REGS_RBX);
2179
2180                ctxt->eflags |= X86_EFLAGS_ZF;
2181        }
2182        return X86EMUL_CONTINUE;
2183}
2184
2185static int em_ret(struct x86_emulate_ctxt *ctxt)
2186{
2187        int rc;
2188        unsigned long eip;
2189
2190        rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2191        if (rc != X86EMUL_CONTINUE)
2192                return rc;
2193
2194        return assign_eip_near(ctxt, eip);
2195}
2196
2197static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2198{
2199        int rc;
2200        unsigned long eip, cs;
2201        int cpl = ctxt->ops->cpl(ctxt);
2202        struct desc_struct new_desc;
2203
2204        rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2205        if (rc != X86EMUL_CONTINUE)
2206                return rc;
2207        rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2208        if (rc != X86EMUL_CONTINUE)
2209                return rc;
2210        /* Outer-privilege level return is not implemented */
2211        if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2212                return X86EMUL_UNHANDLEABLE;
2213        rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2214                                       X86_TRANSFER_RET,
2215                                       &new_desc);
2216        if (rc != X86EMUL_CONTINUE)
2217                return rc;
2218        rc = assign_eip_far(ctxt, eip, &new_desc);
2219        /* Error handling is not implemented. */
2220        if (rc != X86EMUL_CONTINUE)
2221                return X86EMUL_UNHANDLEABLE;
2222
2223        return rc;
2224}
2225
2226static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2227{
2228        int rc;
2229
2230        rc = em_ret_far(ctxt);
2231        if (rc != X86EMUL_CONTINUE)
2232                return rc;
2233        rsp_increment(ctxt, ctxt->src.val);
2234        return X86EMUL_CONTINUE;
2235}
2236
2237static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2238{
2239        /* Save real source value, then compare EAX against destination. */
2240        ctxt->dst.orig_val = ctxt->dst.val;
2241        ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2242        ctxt->src.orig_val = ctxt->src.val;
2243        ctxt->src.val = ctxt->dst.orig_val;
2244        fastop(ctxt, em_cmp);
2245
2246        if (ctxt->eflags & X86_EFLAGS_ZF) {
2247                /* Success: write back to memory; no update of EAX */
2248                ctxt->src.type = OP_NONE;
2249                ctxt->dst.val = ctxt->src.orig_val;
2250        } else {
2251                /* Failure: write the value we saw to EAX. */
2252                ctxt->src.type = OP_REG;
2253                ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2254                ctxt->src.val = ctxt->dst.orig_val;
2255                /* Create write-cycle to dest by writing the same value */
2256                ctxt->dst.val = ctxt->dst.orig_val;
2257        }
2258        return X86EMUL_CONTINUE;
2259}
2260
2261static int em_lseg(struct x86_emulate_ctxt *ctxt)
2262{
2263        int seg = ctxt->src2.val;
2264        unsigned short sel;
2265        int rc;
2266
2267        memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2268
2269        rc = load_segment_descriptor(ctxt, sel, seg);
2270        if (rc != X86EMUL_CONTINUE)
2271                return rc;
2272
2273        ctxt->dst.val = ctxt->src.val;
2274        return rc;
2275}
2276
2277static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2278{
2279#ifdef CONFIG_X86_64
2280        return ctxt->ops->guest_has_long_mode(ctxt);
2281#else
2282        return false;
2283#endif
2284}
2285
2286static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2287{
2288        desc->g    = (flags >> 23) & 1;
2289        desc->d    = (flags >> 22) & 1;
2290        desc->l    = (flags >> 21) & 1;
2291        desc->avl  = (flags >> 20) & 1;
2292        desc->p    = (flags >> 15) & 1;
2293        desc->dpl  = (flags >> 13) & 3;
2294        desc->s    = (flags >> 12) & 1;
2295        desc->type = (flags >>  8) & 15;
2296}
2297
2298static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
2299                           int n)
2300{
2301        struct desc_struct desc;
2302        int offset;
2303        u16 selector;
2304
2305        selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
2306
2307        if (n < 3)
2308                offset = 0x7f84 + n * 12;
2309        else
2310                offset = 0x7f2c + (n - 3) * 12;
2311
2312        set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
2313        set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
2314        rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
2315        ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2316        return X86EMUL_CONTINUE;
2317}
2318
2319#ifdef CONFIG_X86_64
2320static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
2321                           int n)
2322{
2323        struct desc_struct desc;
2324        int offset;
2325        u16 selector;
2326        u32 base3;
2327
2328        offset = 0x7e00 + n * 16;
2329
2330        selector =                GET_SMSTATE(u16, smstate, offset);
2331        rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
2332        set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
2333        set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
2334        base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
2335
2336        ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2337        return X86EMUL_CONTINUE;
2338}
2339#endif
2340
2341static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2342                                    u64 cr0, u64 cr3, u64 cr4)
2343{
2344        int bad;
2345        u64 pcid;
2346
2347        /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
2348        pcid = 0;
2349        if (cr4 & X86_CR4_PCIDE) {
2350                pcid = cr3 & 0xfff;
2351                cr3 &= ~0xfff;
2352        }
2353
2354        bad = ctxt->ops->set_cr(ctxt, 3, cr3);
2355        if (bad)
2356                return X86EMUL_UNHANDLEABLE;
2357
2358        /*
2359         * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2360         * Then enable protected mode.  However, PCID cannot be enabled
2361         * if EFER.LMA=0, so set it separately.
2362         */
2363        bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2364        if (bad)
2365                return X86EMUL_UNHANDLEABLE;
2366
2367        bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2368        if (bad)
2369                return X86EMUL_UNHANDLEABLE;
2370
2371        if (cr4 & X86_CR4_PCIDE) {
2372                bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2373                if (bad)
2374                        return X86EMUL_UNHANDLEABLE;
2375                if (pcid) {
2376                        bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
2377                        if (bad)
2378                                return X86EMUL_UNHANDLEABLE;
2379                }
2380
2381        }
2382
2383        return X86EMUL_CONTINUE;
2384}
2385
2386static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
2387                             const char *smstate)
2388{
2389        struct desc_struct desc;
2390        struct desc_ptr dt;
2391        u16 selector;
2392        u32 val, cr0, cr3, cr4;
2393        int i;
2394
2395        cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
2396        cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
2397        ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
2398        ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
2399
2400        for (i = 0; i < 8; i++)
2401                *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
2402
2403        val = GET_SMSTATE(u32, smstate, 0x7fcc);
2404
2405        if (ctxt->ops->set_dr(ctxt, 6, val))
2406                return X86EMUL_UNHANDLEABLE;
2407
2408        val = GET_SMSTATE(u32, smstate, 0x7fc8);
2409
2410        if (ctxt->ops->set_dr(ctxt, 7, val))
2411                return X86EMUL_UNHANDLEABLE;
2412
2413        selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
2414        set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
2415        set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
2416        rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
2417        ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2418
2419        selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
2420        set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
2421        set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
2422        rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
2423        ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2424
2425        dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
2426        dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
2427        ctxt->ops->set_gdt(ctxt, &dt);
2428
2429        dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
2430        dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
2431        ctxt->ops->set_idt(ctxt, &dt);
2432
2433        for (i = 0; i < 6; i++) {
2434                int r = rsm_load_seg_32(ctxt, smstate, i);
2435                if (r != X86EMUL_CONTINUE)
2436                        return r;
2437        }
2438
2439        cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
2440
2441        ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
2442
2443        return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2444}
2445
2446#ifdef CONFIG_X86_64
2447static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
2448                             const char *smstate)
2449{
2450        struct desc_struct desc;
2451        struct desc_ptr dt;
2452        u64 val, cr0, cr3, cr4;
2453        u32 base3;
2454        u16 selector;
2455        int i, r;
2456
2457        for (i = 0; i < 16; i++)
2458                *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
2459
2460        ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
2461        ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
2462
2463        val = GET_SMSTATE(u64, smstate, 0x7f68);
2464
2465        if (ctxt->ops->set_dr(ctxt, 6, val))
2466                return X86EMUL_UNHANDLEABLE;
2467
2468        val = GET_SMSTATE(u64, smstate, 0x7f60);
2469
2470        if (ctxt->ops->set_dr(ctxt, 7, val))
2471                return X86EMUL_UNHANDLEABLE;
2472
2473        cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
2474        cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
2475        cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
2476        ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
2477        val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
2478
2479        if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
2480                return X86EMUL_UNHANDLEABLE;
2481
2482        selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
2483        rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
2484        set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
2485        set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
2486        base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
2487        ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2488
2489        dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
2490        dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
2491        ctxt->ops->set_idt(ctxt, &dt);
2492
2493        selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
2494        rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
2495        set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
2496        set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
2497        base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
2498        ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2499
2500        dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
2501        dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
2502        ctxt->ops->set_gdt(ctxt, &dt);
2503
2504        r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
2505        if (r != X86EMUL_CONTINUE)
2506                return r;
2507
2508        for (i = 0; i < 6; i++) {
2509                r = rsm_load_seg_64(ctxt, smstate, i);
2510                if (r != X86EMUL_CONTINUE)
2511                        return r;
2512        }
2513
2514        return X86EMUL_CONTINUE;
2515}
2516#endif
2517
2518static int em_rsm(struct x86_emulate_ctxt *ctxt)
2519{
2520        unsigned long cr0, cr4, efer;
2521        char buf[512];
2522        u64 smbase;
2523        int ret;
2524
2525        if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
2526                return emulate_ud(ctxt);
2527
2528        smbase = ctxt->ops->get_smbase(ctxt);
2529
2530        ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
2531        if (ret != X86EMUL_CONTINUE)
2532                return X86EMUL_UNHANDLEABLE;
2533
2534        if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2535                ctxt->ops->set_nmi_mask(ctxt, false);
2536
2537        ctxt->ops->exiting_smm(ctxt);
2538
2539        /*
2540         * Get back to real mode, to prepare a safe state in which to load
2541         * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
2542         * supports long mode.
2543         */
2544        if (emulator_has_longmode(ctxt)) {
2545                struct desc_struct cs_desc;
2546
2547                /* Zero CR4.PCIDE before CR0.PG.  */
2548                cr4 = ctxt->ops->get_cr(ctxt, 4);
2549                if (cr4 & X86_CR4_PCIDE)
2550                        ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2551
2552                /* A 32-bit code segment is required to clear EFER.LMA.  */
2553                memset(&cs_desc, 0, sizeof(cs_desc));
2554                cs_desc.type = 0xb;
2555                cs_desc.s = cs_desc.g = cs_desc.p = 1;
2556                ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2557        }
2558
2559        /* For the 64-bit case, this will clear EFER.LMA.  */
2560        cr0 = ctxt->ops->get_cr(ctxt, 0);
2561        if (cr0 & X86_CR0_PE)
2562                ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2563
2564        if (emulator_has_longmode(ctxt)) {
2565                /* Clear CR4.PAE before clearing EFER.LME. */
2566                cr4 = ctxt->ops->get_cr(ctxt, 4);
2567                if (cr4 & X86_CR4_PAE)
2568                        ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2569
2570                /* And finally go back to 32-bit mode.  */
2571                efer = 0;
2572                ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2573        }
2574
2575        /*
2576         * Give leave_smm() a chance to make ISA-specific changes to the vCPU
2577         * state (e.g. enter guest mode) before loading state from the SMM
2578         * state-save area.
2579         */
2580        if (ctxt->ops->leave_smm(ctxt, buf))
2581                goto emulate_shutdown;
2582
2583#ifdef CONFIG_X86_64
2584        if (emulator_has_longmode(ctxt))
2585                ret = rsm_load_state_64(ctxt, buf);
2586        else
2587#endif
2588                ret = rsm_load_state_32(ctxt, buf);
2589
2590        if (ret != X86EMUL_CONTINUE)
2591                goto emulate_shutdown;
2592
2593        /*
2594         * Note, the ctxt->ops callbacks are responsible for handling side
2595         * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
2596         * runtime updates, etc...  If that changes, e.g. this flow is moved
2597         * out of the emulator to make it look more like enter_smm(), then
2598         * those side effects need to be explicitly handled for both success
2599         * and shutdown.
2600         */
2601        return X86EMUL_CONTINUE;
2602
2603emulate_shutdown:
2604        ctxt->ops->triple_fault(ctxt);
2605        return X86EMUL_CONTINUE;
2606}
2607
2608static void
2609setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2610                        struct desc_struct *cs, struct desc_struct *ss)
2611{
2612        cs->l = 0;              /* will be adjusted later */
2613        set_desc_base(cs, 0);   /* flat segment */
2614        cs->g = 1;              /* 4kb granularity */
2615        set_desc_limit(cs, 0xfffff);    /* 4GB limit */
2616        cs->type = 0x0b;        /* Read, Execute, Accessed */
2617        cs->s = 1;
2618        cs->dpl = 0;            /* will be adjusted later */
2619        cs->p = 1;
2620        cs->d = 1;
2621        cs->avl = 0;
2622
2623        set_desc_base(ss, 0);   /* flat segment */
2624        set_desc_limit(ss, 0xfffff);    /* 4GB limit */
2625        ss->g = 1;              /* 4kb granularity */
2626        ss->s = 1;
2627        ss->type = 0x03;        /* Read/Write, Accessed */
2628        ss->d = 1;              /* 32bit stack segment */
2629        ss->dpl = 0;
2630        ss->p = 1;
2631        ss->l = 0;
2632        ss->avl = 0;
2633}
2634
2635static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2636{
2637        u32 eax, ebx, ecx, edx;
2638
2639        eax = ecx = 0;
2640        ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2641        return is_guest_vendor_intel(ebx, ecx, edx);
2642}
2643
2644static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2645{
2646        const struct x86_emulate_ops *ops = ctxt->ops;
2647        u32 eax, ebx, ecx, edx;
2648
2649        /*
2650         * syscall should always be enabled in longmode - so only become
2651         * vendor specific (cpuid) if other modes are active...
2652         */
2653        if (ctxt->mode == X86EMUL_MODE_PROT64)
2654                return true;
2655
2656        eax = 0x00000000;
2657        ecx = 0x00000000;
2658        ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2659        /*
2660         * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2661         * 64bit guest with a 32bit compat-app running will #UD !! While this
2662         * behaviour can be fixed (by emulating) into AMD response - CPUs of
2663         * AMD can't behave like Intel.
2664         */
2665        if (is_guest_vendor_intel(ebx, ecx, edx))
2666                return false;
2667
2668        if (is_guest_vendor_amd(ebx, ecx, edx) ||
2669            is_guest_vendor_hygon(ebx, ecx, edx))
2670                return true;
2671
2672        /*
2673         * default: (not Intel, not AMD, not Hygon), apply Intel's
2674         * stricter rules...
2675         */
2676        return false;
2677}
2678
2679static int em_syscall(struct x86_emulate_ctxt *ctxt)
2680{
2681        const struct x86_emulate_ops *ops = ctxt->ops;
2682        struct desc_struct cs, ss;
2683        u64 msr_data;
2684        u16 cs_sel, ss_sel;
2685        u64 efer = 0;
2686
2687        /* syscall is not available in real mode */
2688        if (ctxt->mode == X86EMUL_MODE_REAL ||
2689            ctxt->mode == X86EMUL_MODE_VM86)
2690                return emulate_ud(ctxt);
2691
2692        if (!(em_syscall_is_enabled(ctxt)))
2693                return emulate_ud(ctxt);
2694
2695        ops->get_msr(ctxt, MSR_EFER, &efer);
2696        if (!(efer & EFER_SCE))
2697                return emulate_ud(ctxt);
2698
2699        setup_syscalls_segments(ctxt, &cs, &ss);
2700        ops->get_msr(ctxt, MSR_STAR, &msr_data);
2701        msr_data >>= 32;
2702        cs_sel = (u16)(msr_data & 0xfffc);
2703        ss_sel = (u16)(msr_data + 8);
2704
2705        if (efer & EFER_LMA) {
2706                cs.d = 0;
2707                cs.l = 1;
2708        }
2709        ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2710        ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2711
2712        *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2713        if (efer & EFER_LMA) {
2714#ifdef CONFIG_X86_64
2715                *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2716
2717                ops->get_msr(ctxt,
2718                             ctxt->mode == X86EMUL_MODE_PROT64 ?
2719                             MSR_LSTAR : MSR_CSTAR, &msr_data);
2720                ctxt->_eip = msr_data;
2721
2722                ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2723                ctxt->eflags &= ~msr_data;
2724                ctxt->eflags |= X86_EFLAGS_FIXED;
2725#endif
2726        } else {
2727                /* legacy mode */
2728                ops->get_msr(ctxt, MSR_STAR, &msr_data);
2729                ctxt->_eip = (u32)msr_data;
2730
2731                ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2732        }
2733
2734        ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2735        return X86EMUL_CONTINUE;
2736}
2737
2738static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2739{
2740        const struct x86_emulate_ops *ops = ctxt->ops;
2741        struct desc_struct cs, ss;
2742        u64 msr_data;
2743        u16 cs_sel, ss_sel;
2744        u64 efer = 0;
2745
2746        ops->get_msr(ctxt, MSR_EFER, &efer);
2747        /* inject #GP if in real mode */
2748        if (ctxt->mode == X86EMUL_MODE_REAL)
2749                return emulate_gp(ctxt, 0);
2750
2751        /*
2752         * Not recognized on AMD in compat mode (but is recognized in legacy
2753         * mode).
2754         */
2755        if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2756            && !vendor_intel(ctxt))
2757                return emulate_ud(ctxt);
2758
2759        /* sysenter/sysexit have not been tested in 64bit mode. */
2760        if (ctxt->mode == X86EMUL_MODE_PROT64)
2761                return X86EMUL_UNHANDLEABLE;
2762
2763        ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2764        if ((msr_data & 0xfffc) == 0x0)
2765                return emulate_gp(ctxt, 0);
2766
2767        setup_syscalls_segments(ctxt, &cs, &ss);
2768        ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2769        cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2770        ss_sel = cs_sel + 8;
2771        if (efer & EFER_LMA) {
2772                cs.d = 0;
2773                cs.l = 1;
2774        }
2775
2776        ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2777        ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2778
2779        ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2780        ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2781
2782        ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2783        *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2784                                                              (u32)msr_data;
2785        if (efer & EFER_LMA)
2786                ctxt->mode = X86EMUL_MODE_PROT64;
2787
2788        return X86EMUL_CONTINUE;
2789}
2790
2791static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2792{
2793        const struct x86_emulate_ops *ops = ctxt->ops;
2794        struct desc_struct cs, ss;
2795        u64 msr_data, rcx, rdx;
2796        int usermode;
2797        u16 cs_sel = 0, ss_sel = 0;
2798
2799        /* inject #GP if in real mode or Virtual 8086 mode */
2800        if (ctxt->mode == X86EMUL_MODE_REAL ||
2801            ctxt->mode == X86EMUL_MODE_VM86)
2802                return emulate_gp(ctxt, 0);
2803
2804        setup_syscalls_segments(ctxt, &cs, &ss);
2805
2806        if ((ctxt->rex_prefix & 0x8) != 0x0)
2807                usermode = X86EMUL_MODE_PROT64;
2808        else
2809                usermode = X86EMUL_MODE_PROT32;
2810
2811        rcx = reg_read(ctxt, VCPU_REGS_RCX);
2812        rdx = reg_read(ctxt, VCPU_REGS_RDX);
2813
2814        cs.dpl = 3;
2815        ss.dpl = 3;
2816        ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2817        switch (usermode) {
2818        case X86EMUL_MODE_PROT32:
2819                cs_sel = (u16)(msr_data + 16);
2820                if ((msr_data & 0xfffc) == 0x0)
2821                        return emulate_gp(ctxt, 0);
2822                ss_sel = (u16)(msr_data + 24);
2823                rcx = (u32)rcx;
2824                rdx = (u32)rdx;
2825                break;
2826        case X86EMUL_MODE_PROT64:
2827                cs_sel = (u16)(msr_data + 32);
2828                if (msr_data == 0x0)
2829                        return emulate_gp(ctxt, 0);
2830                ss_sel = cs_sel + 8;
2831                cs.d = 0;
2832                cs.l = 1;
2833                if (emul_is_noncanonical_address(rcx, ctxt) ||
2834                    emul_is_noncanonical_address(rdx, ctxt))
2835                        return emulate_gp(ctxt, 0);
2836                break;
2837        }
2838        cs_sel |= SEGMENT_RPL_MASK;
2839        ss_sel |= SEGMENT_RPL_MASK;
2840
2841        ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2842        ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2843
2844        ctxt->_eip = rdx;
2845        *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2846
2847        return X86EMUL_CONTINUE;
2848}
2849
2850static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2851{
2852        int iopl;
2853        if (ctxt->mode == X86EMUL_MODE_REAL)
2854                return false;
2855        if (ctxt->mode == X86EMUL_MODE_VM86)
2856                return true;
2857        iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2858        return ctxt->ops->cpl(ctxt) > iopl;
2859}
2860
2861#define VMWARE_PORT_VMPORT      (0x5658)
2862#define VMWARE_PORT_VMRPC       (0x5659)
2863
2864static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2865                                            u16 port, u16 len)
2866{
2867        const struct x86_emulate_ops *ops = ctxt->ops;
2868        struct desc_struct tr_seg;
2869        u32 base3;
2870        int r;
2871        u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2872        unsigned mask = (1 << len) - 1;
2873        unsigned long base;
2874
2875        /*
2876         * VMware allows access to these ports even if denied
2877         * by TSS I/O permission bitmap. Mimic behavior.
2878         */
2879        if (enable_vmware_backdoor &&
2880            ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2881                return true;
2882
2883        ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2884        if (!tr_seg.p)
2885                return false;
2886        if (desc_limit_scaled(&tr_seg) < 103)
2887                return false;
2888        base = get_desc_base(&tr_seg);
2889#ifdef CONFIG_X86_64
2890        base |= ((u64)base3) << 32;
2891#endif
2892        r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2893        if (r != X86EMUL_CONTINUE)
2894                return false;
2895        if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2896                return false;
2897        r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2898        if (r != X86EMUL_CONTINUE)
2899                return false;
2900        if ((perm >> bit_idx) & mask)
2901                return false;
2902        return true;
2903}
2904
2905static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2906                                 u16 port, u16 len)
2907{
2908        if (ctxt->perm_ok)
2909                return true;
2910
2911        if (emulator_bad_iopl(ctxt))
2912                if (!emulator_io_port_access_allowed(ctxt, port, len))
2913                        return false;
2914
2915        ctxt->perm_ok = true;
2916
2917        return true;
2918}
2919
2920static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2921{
2922        /*
2923         * Intel CPUs mask the counter and pointers in quite strange
2924         * manner when ECX is zero due to REP-string optimizations.
2925         */
2926#ifdef CONFIG_X86_64
2927        if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2928                return;
2929
2930        *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2931
2932        switch (ctxt->b) {
2933        case 0xa4:      /* movsb */
2934        case 0xa5:      /* movsd/w */
2935                *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2936                fallthrough;
2937        case 0xaa:      /* stosb */
2938        case 0xab:      /* stosd/w */
2939                *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2940        }
2941#endif
2942}
2943
2944static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2945                                struct tss_segment_16 *tss)
2946{
2947        tss->ip = ctxt->_eip;
2948        tss->flag = ctxt->eflags;
2949        tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2950        tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2951        tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2952        tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2953        tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2954        tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2955        tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2956        tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2957
2958        tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2959        tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2960        tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2961        tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2962        tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2963}
2964
2965static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2966                                 struct tss_segment_16 *tss)
2967{
2968        int ret;
2969        u8 cpl;
2970
2971        ctxt->_eip = tss->ip;
2972        ctxt->eflags = tss->flag | 2;
2973        *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2974        *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2975        *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2976        *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2977        *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2978        *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2979        *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2980        *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2981
2982        /*
2983         * SDM says that segment selectors are loaded before segment
2984         * descriptors
2985         */
2986        set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2987        set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2988        set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2989        set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2990        set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2991
2992        cpl = tss->cs & 3;
2993
2994        /*
2995         * Now load segment descriptors. If fault happens at this stage
2996         * it is handled in a context of new task
2997         */
2998        ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2999                                        X86_TRANSFER_TASK_SWITCH, NULL);
3000        if (ret != X86EMUL_CONTINUE)
3001                return ret;
3002        ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3003                                        X86_TRANSFER_TASK_SWITCH, NULL);
3004        if (ret != X86EMUL_CONTINUE)
3005                return ret;
3006        ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3007                                        X86_TRANSFER_TASK_SWITCH, NULL);
3008        if (ret != X86EMUL_CONTINUE)
3009                return ret;
3010        ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3011                                        X86_TRANSFER_TASK_SWITCH, NULL);
3012        if (ret != X86EMUL_CONTINUE)
3013                return ret;
3014        ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3015                                        X86_TRANSFER_TASK_SWITCH, NULL);
3016        if (ret != X86EMUL_CONTINUE)
3017                return ret;
3018
3019        return X86EMUL_CONTINUE;
3020}
3021
3022static int task_switch_16(struct x86_emulate_ctxt *ctxt,
3023                          u16 tss_selector, u16 old_tss_sel,
3024                          ulong old_tss_base, struct desc_struct *new_desc)
3025{
3026        struct tss_segment_16 tss_seg;
3027        int ret;
3028        u32 new_tss_base = get_desc_base(new_desc);
3029
3030        ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3031        if (ret != X86EMUL_CONTINUE)
3032                return ret;
3033
3034        save_state_to_tss16(ctxt, &tss_seg);
3035
3036        ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3037        if (ret != X86EMUL_CONTINUE)
3038                return ret;
3039
3040        ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3041        if (ret != X86EMUL_CONTINUE)
3042                return ret;
3043
3044        if (old_tss_sel != 0xffff) {
3045                tss_seg.prev_task_link = old_tss_sel;
3046
3047                ret = linear_write_system(ctxt, new_tss_base,
3048                                          &tss_seg.prev_task_link,
3049                                          sizeof(tss_seg.prev_task_link));
3050                if (ret != X86EMUL_CONTINUE)
3051                        return ret;
3052        }
3053
3054        return load_state_from_tss16(ctxt, &tss_seg);
3055}
3056
3057static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
3058                                struct tss_segment_32 *tss)
3059{
3060        /* CR3 and ldt selector are not saved intentionally */
3061        tss->eip = ctxt->_eip;
3062        tss->eflags = ctxt->eflags;
3063        tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
3064        tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
3065        tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
3066        tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3067        tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3068        tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3069        tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3070        tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3071
3072        tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3073        tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3074        tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3075        tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3076        tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3077        tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3078}
3079
3080static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3081                                 struct tss_segment_32 *tss)
3082{
3083        int ret;
3084        u8 cpl;
3085
3086        if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3087                return emulate_gp(ctxt, 0);
3088        ctxt->_eip = tss->eip;
3089        ctxt->eflags = tss->eflags | 2;
3090
3091        /* General purpose registers */
3092        *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3093        *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3094        *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3095        *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3096        *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3097        *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3098        *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3099        *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3100
3101        /*
3102         * SDM says that segment selectors are loaded before segment
3103         * descriptors.  This is important because CPL checks will
3104         * use CS.RPL.
3105         */
3106        set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3107        set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3108        set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3109        set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3110        set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3111        set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3112        set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3113
3114        /*
3115         * If we're switching between Protected Mode and VM86, we need to make
3116         * sure to update the mode before loading the segment descriptors so
3117         * that the selectors are interpreted correctly.
3118         */
3119        if (ctxt->eflags & X86_EFLAGS_VM) {
3120                ctxt->mode = X86EMUL_MODE_VM86;
3121                cpl = 3;
3122        } else {
3123                ctxt->mode = X86EMUL_MODE_PROT32;
3124                cpl = tss->cs & 3;
3125        }
3126
3127        /*
3128         * Now load segment descriptors. If fault happens at this stage
3129         * it is handled in a context of new task
3130         */
3131        ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3132                                        cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3133        if (ret != X86EMUL_CONTINUE)
3134                return ret;
3135        ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3136                                        X86_TRANSFER_TASK_SWITCH, NULL);
3137        if (ret != X86EMUL_CONTINUE)
3138                return ret;
3139        ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3140                                        X86_TRANSFER_TASK_SWITCH, NULL);
3141        if (ret != X86EMUL_CONTINUE)
3142                return ret;
3143        ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3144                                        X86_TRANSFER_TASK_SWITCH, NULL);
3145        if (ret != X86EMUL_CONTINUE)
3146                return ret;
3147        ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3148                                        X86_TRANSFER_TASK_SWITCH, NULL);
3149        if (ret != X86EMUL_CONTINUE)
3150                return ret;
3151        ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3152                                        X86_TRANSFER_TASK_SWITCH, NULL);
3153        if (ret != X86EMUL_CONTINUE)
3154                return ret;
3155        ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3156                                        X86_TRANSFER_TASK_SWITCH, NULL);
3157
3158        return ret;
3159}
3160
3161static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3162                          u16 tss_selector, u16 old_tss_sel,
3163                          ulong old_tss_base, struct desc_struct *new_desc)
3164{
3165        struct tss_segment_32 tss_seg;
3166        int ret;
3167        u32 new_tss_base = get_desc_base(new_desc);
3168        u32 eip_offset = offsetof(struct tss_segment_32, eip);
3169        u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3170
3171        ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
3172        if (ret != X86EMUL_CONTINUE)
3173                return ret;
3174
3175        save_state_to_tss32(ctxt, &tss_seg);
3176
3177        /* Only GP registers and segment selectors are saved */
3178        ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3179                                  ldt_sel_offset - eip_offset);
3180        if (ret != X86EMUL_CONTINUE)
3181                return ret;
3182
3183        ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
3184        if (ret != X86EMUL_CONTINUE)
3185                return ret;
3186
3187        if (old_tss_sel != 0xffff) {
3188                tss_seg.prev_task_link = old_tss_sel;
3189
3190                ret = linear_write_system(ctxt, new_tss_base,
3191                                          &tss_seg.prev_task_link,
3192                                          sizeof(tss_seg.prev_task_link));
3193                if (ret != X86EMUL_CONTINUE)
3194                        return ret;
3195        }
3196
3197        return load_state_from_tss32(ctxt, &tss_seg);
3198}
3199
3200static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3201                                   u16 tss_selector, int idt_index, int reason,
3202                                   bool has_error_code, u32 error_code)
3203{
3204        const struct x86_emulate_ops *ops = ctxt->ops;
3205        struct desc_struct curr_tss_desc, next_tss_desc;
3206        int ret;
3207        u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3208        ulong old_tss_base =
3209                ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3210        u32 desc_limit;
3211        ulong desc_addr, dr7;
3212
3213        /* FIXME: old_tss_base == ~0 ? */
3214
3215        ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3216        if (ret != X86EMUL_CONTINUE)
3217                return ret;
3218        ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3219        if (ret != X86EMUL_CONTINUE)
3220                return ret;
3221
3222        /* FIXME: check that next_tss_desc is tss */
3223
3224        /*
3225         * Check privileges. The three cases are task switch caused by...
3226         *
3227         * 1. jmp/call/int to task gate: Check against DPL of the task gate
3228         * 2. Exception/IRQ/iret: No check is performed
3229         * 3. jmp/call to TSS/task-gate: No check is performed since the
3230         *    hardware checks it before exiting.
3231         */
3232        if (reason == TASK_SWITCH_GATE) {
3233                if (idt_index != -1) {
3234                        /* Software interrupts */
3235                        struct desc_struct task_gate_desc;
3236                        int dpl;
3237
3238                        ret = read_interrupt_descriptor(ctxt, idt_index,
3239                                                        &task_gate_desc);
3240                        if (ret != X86EMUL_CONTINUE)
3241                                return ret;
3242
3243                        dpl = task_gate_desc.dpl;
3244                        if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3245                                return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3246                }
3247        }
3248
3249        desc_limit = desc_limit_scaled(&next_tss_desc);
3250        if (!next_tss_desc.p ||
3251            ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3252             desc_limit < 0x2b)) {
3253                return emulate_ts(ctxt, tss_selector & 0xfffc);
3254        }
3255
3256        if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3257                curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3258                write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3259        }
3260
3261        if (reason == TASK_SWITCH_IRET)
3262                ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3263
3264        /* set back link to prev task only if NT bit is set in eflags
3265           note that old_tss_sel is not used after this point */
3266        if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3267                old_tss_sel = 0xffff;
3268
3269        if (next_tss_desc.type & 8)
3270                ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3271                                     old_tss_base, &next_tss_desc);
3272        else
3273                ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3274                                     old_tss_base, &next_tss_desc);
3275        if (ret != X86EMUL_CONTINUE)
3276                return ret;
3277
3278        if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3279                ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3280
3281        if (reason != TASK_SWITCH_IRET) {
3282                next_tss_desc.type |= (1 << 1); /* set busy flag */
3283                write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3284        }
3285
3286        ops->set_cr(ctxt, 0,  ops->get_cr(ctxt, 0) | X86_CR0_TS);
3287        ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3288
3289        if (has_error_code) {
3290                ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3291                ctxt->lock_prefix = 0;
3292                ctxt->src.val = (unsigned long) error_code;
3293                ret = em_push(ctxt);
3294        }
3295
3296        ops->get_dr(ctxt, 7, &dr7);
3297        ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3298
3299        return ret;
3300}
3301
3302int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3303                         u16 tss_selector, int idt_index, int reason,
3304                         bool has_error_code, u32 error_code)
3305{
3306        int rc;
3307
3308        invalidate_registers(ctxt);
3309        ctxt->_eip = ctxt->eip;
3310        ctxt->dst.type = OP_NONE;
3311
3312        rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3313                                     has_error_code, error_code);
3314
3315        if (rc == X86EMUL_CONTINUE) {
3316                ctxt->eip = ctxt->_eip;
3317                writeback_registers(ctxt);
3318        }
3319
3320        return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3321}
3322
3323static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3324                struct operand *op)
3325{
3326        int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3327
3328        register_address_increment(ctxt, reg, df * op->bytes);
3329        op->addr.mem.ea = register_address(ctxt, reg);
3330}
3331
3332static int em_das(struct x86_emulate_ctxt *ctxt)
3333{
3334        u8 al, old_al;
3335        bool af, cf, old_cf;
3336
3337        cf = ctxt->eflags & X86_EFLAGS_CF;
3338        al = ctxt->dst.val;
3339
3340        old_al = al;
3341        old_cf = cf;
3342        cf = false;
3343        af = ctxt->eflags & X86_EFLAGS_AF;
3344        if ((al & 0x0f) > 9 || af) {
3345                al -= 6;
3346                cf = old_cf | (al >= 250);
3347                af = true;
3348        } else {
3349                af = false;
3350        }
3351        if (old_al > 0x99 || old_cf) {
3352                al -= 0x60;
3353                cf = true;
3354        }
3355
3356        ctxt->dst.val = al;
3357        /* Set PF, ZF, SF */
3358        ctxt->src.type = OP_IMM;
3359        ctxt->src.val = 0;
3360        ctxt->src.bytes = 1;
3361        fastop(ctxt, em_or);
3362        ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3363        if (cf)
3364                ctxt->eflags |= X86_EFLAGS_CF;
3365        if (af)
3366                ctxt->eflags |= X86_EFLAGS_AF;
3367        return X86EMUL_CONTINUE;
3368}
3369
3370static int em_aam(struct x86_emulate_ctxt *ctxt)
3371{
3372        u8 al, ah;
3373
3374        if (ctxt->src.val == 0)
3375                return emulate_de(ctxt);
3376
3377        al = ctxt->dst.val & 0xff;
3378        ah = al / ctxt->src.val;
3379        al %= ctxt->src.val;
3380
3381        ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3382
3383        /* Set PF, ZF, SF */
3384        ctxt->src.type = OP_IMM;
3385        ctxt->src.val = 0;
3386        ctxt->src.bytes = 1;
3387        fastop(ctxt, em_or);
3388
3389        return X86EMUL_CONTINUE;
3390}
3391
3392static int em_aad(struct x86_emulate_ctxt *ctxt)
3393{
3394        u8 al = ctxt->dst.val & 0xff;
3395        u8 ah = (ctxt->dst.val >> 8) & 0xff;
3396
3397        al = (al + (ah * ctxt->src.val)) & 0xff;
3398
3399        ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3400
3401        /* Set PF, ZF, SF */
3402        ctxt->src.type = OP_IMM;
3403        ctxt->src.val = 0;
3404        ctxt->src.bytes = 1;
3405        fastop(ctxt, em_or);
3406
3407        return X86EMUL_CONTINUE;
3408}
3409
3410static int em_call(struct x86_emulate_ctxt *ctxt)
3411{
3412        int rc;
3413        long rel = ctxt->src.val;
3414
3415        ctxt->src.val = (unsigned long)ctxt->_eip;
3416        rc = jmp_rel(ctxt, rel);
3417        if (rc != X86EMUL_CONTINUE)
3418                return rc;
3419        return em_push(ctxt);
3420}
3421
3422static int em_call_far(struct x86_emulate_ctxt *ctxt)
3423{
3424        u16 sel, old_cs;
3425        ulong old_eip;
3426        int rc;
3427        struct desc_struct old_desc, new_desc;
3428        const struct x86_emulate_ops *ops = ctxt->ops;
3429        int cpl = ctxt->ops->cpl(ctxt);
3430        enum x86emul_mode prev_mode = ctxt->mode;
3431
3432        old_eip = ctxt->_eip;
3433        ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3434
3435        memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3436        rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3437                                       X86_TRANSFER_CALL_JMP, &new_desc);
3438        if (rc != X86EMUL_CONTINUE)
3439                return rc;
3440
3441        rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3442        if (rc != X86EMUL_CONTINUE)
3443                goto fail;
3444
3445        ctxt->src.val = old_cs;
3446        rc = em_push(ctxt);
3447        if (rc != X86EMUL_CONTINUE)
3448                goto fail;
3449
3450        ctxt->src.val = old_eip;
3451        rc = em_push(ctxt);
3452        /* If we failed, we tainted the memory, but the very least we should
3453           restore cs */
3454        if (rc != X86EMUL_CONTINUE) {
3455                pr_warn_once("faulting far call emulation tainted memory\n");
3456                goto fail;
3457        }
3458        return rc;
3459fail:
3460        ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3461        ctxt->mode = prev_mode;
3462        return rc;
3463
3464}
3465
3466static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3467{
3468        int rc;
3469        unsigned long eip;
3470
3471        rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3472        if (rc != X86EMUL_CONTINUE)
3473                return rc;
3474        rc = assign_eip_near(ctxt, eip);
3475        if (rc != X86EMUL_CONTINUE)
3476                return rc;
3477        rsp_increment(ctxt, ctxt->src.val);
3478        return X86EMUL_CONTINUE;
3479}
3480
3481static int em_xchg(struct x86_emulate_ctxt *ctxt)
3482{
3483        /* Write back the register source. */
3484        ctxt->src.val = ctxt->dst.val;
3485        write_register_operand(&ctxt->src);
3486
3487        /* Write back the memory destination with implicit LOCK prefix. */
3488        ctxt->dst.val = ctxt->src.orig_val;
3489        ctxt->lock_prefix = 1;
3490        return X86EMUL_CONTINUE;
3491}
3492
3493static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3494{
3495        ctxt->dst.val = ctxt->src2.val;
3496        return fastop(ctxt, em_imul);
3497}
3498
3499static int em_cwd(struct x86_emulate_ctxt *ctxt)
3500{
3501        ctxt->dst.type = OP_REG;
3502        ctxt->dst.bytes = ctxt->src.bytes;
3503        ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3504        ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3505
3506        return X86EMUL_CONTINUE;
3507}
3508
3509static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3510{
3511        u64 tsc_aux = 0;
3512
3513        if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
3514                return emulate_ud(ctxt);
3515        ctxt->dst.val = tsc_aux;
3516        return X86EMUL_CONTINUE;
3517}
3518
3519static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3520{
3521        u64 tsc = 0;
3522
3523        ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3524        *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3525        *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3526        return X86EMUL_CONTINUE;
3527}
3528
3529static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3530{
3531        u64 pmc;
3532
3533        if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3534                return emulate_gp(ctxt, 0);
3535        *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3536        *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3537        return X86EMUL_CONTINUE;
3538}
3539
3540static int em_mov(struct x86_emulate_ctxt *ctxt)
3541{
3542        memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3543        return X86EMUL_CONTINUE;
3544}
3545
3546static int em_movbe(struct x86_emulate_ctxt *ctxt)
3547{
3548        u16 tmp;
3549
3550        if (!ctxt->ops->guest_has_movbe(ctxt))
3551                return emulate_ud(ctxt);
3552
3553        switch (ctxt->op_bytes) {
3554        case 2:
3555                /*
3556                 * From MOVBE definition: "...When the operand size is 16 bits,
3557                 * the upper word of the destination register remains unchanged
3558                 * ..."
3559                 *
3560                 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3561                 * rules so we have to do the operation almost per hand.
3562                 */
3563                tmp = (u16)ctxt->src.val;
3564                ctxt->dst.val &= ~0xffffUL;
3565                ctxt->dst.val |= (unsigned long)swab16(tmp);
3566                break;
3567        case 4:
3568                ctxt->dst.val = swab32((u32)ctxt->src.val);
3569                break;
3570        case 8:
3571                ctxt->dst.val = swab64(ctxt->src.val);
3572                break;
3573        default:
3574                BUG();
3575        }
3576        return X86EMUL_CONTINUE;
3577}
3578
3579static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3580{
3581        if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3582                return emulate_gp(ctxt, 0);
3583
3584        /* Disable writeback. */
3585        ctxt->dst.type = OP_NONE;
3586        return X86EMUL_CONTINUE;
3587}
3588
3589static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3590{
3591        unsigned long val;
3592
3593        if (ctxt->mode == X86EMUL_MODE_PROT64)
3594                val = ctxt->src.val & ~0ULL;
3595        else
3596                val = ctxt->src.val & ~0U;
3597
3598        /* #UD condition is already handled. */
3599        if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3600                return emulate_gp(ctxt, 0);
3601
3602        /* Disable writeback. */
3603        ctxt->dst.type = OP_NONE;
3604        return X86EMUL_CONTINUE;
3605}
3606
3607static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3608{
3609        u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3610        u64 msr_data;
3611        int r;
3612
3613        msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3614                | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3615        r = ctxt->ops->set_msr(ctxt, msr_index, msr_data);
3616
3617        if (r == X86EMUL_IO_NEEDED)
3618                return r;
3619
3620        if (r > 0)
3621                return emulate_gp(ctxt, 0);
3622
3623        return r < 0 ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
3624}
3625
3626static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3627{
3628        u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3629        u64 msr_data;
3630        int r;
3631
3632        r = ctxt->ops->get_msr(ctxt, msr_index, &msr_data);
3633
3634        if (r == X86EMUL_IO_NEEDED)
3635                return r;
3636
3637        if (r)
3638                return emulate_gp(ctxt, 0);
3639
3640        *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3641        *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3642        return X86EMUL_CONTINUE;
3643}
3644
3645static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3646{
3647        if (segment > VCPU_SREG_GS &&
3648            (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3649            ctxt->ops->cpl(ctxt) > 0)
3650                return emulate_gp(ctxt, 0);
3651
3652        ctxt->dst.val = get_segment_selector(ctxt, segment);
3653        if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3654                ctxt->dst.bytes = 2;
3655        return X86EMUL_CONTINUE;
3656}
3657
3658static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3659{
3660        if (ctxt->modrm_reg > VCPU_SREG_GS)
3661                return emulate_ud(ctxt);
3662
3663        return em_store_sreg(ctxt, ctxt->modrm_reg);
3664}
3665
3666static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3667{
3668        u16 sel = ctxt->src.val;
3669
3670        if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3671                return emulate_ud(ctxt);
3672
3673        if (ctxt->modrm_reg == VCPU_SREG_SS)
3674                ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3675
3676        /* Disable writeback. */
3677        ctxt->dst.type = OP_NONE;
3678        return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3679}
3680
3681static int em_sldt(struct x86_emulate_ctxt *ctxt)
3682{
3683        return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3684}
3685
3686static int em_lldt(struct x86_emulate_ctxt *ctxt)
3687{
3688        u16 sel = ctxt->src.val;
3689
3690        /* Disable writeback. */
3691        ctxt->dst.type = OP_NONE;
3692        return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3693}
3694
3695static int em_str(struct x86_emulate_ctxt *ctxt)
3696{
3697        return em_store_sreg(ctxt, VCPU_SREG_TR);
3698}
3699
3700static int em_ltr(struct x86_emulate_ctxt *ctxt)
3701{
3702        u16 sel = ctxt->src.val;
3703
3704        /* Disable writeback. */
3705        ctxt->dst.type = OP_NONE;
3706        return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3707}
3708
3709static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3710{
3711        int rc;
3712        ulong linear;
3713
3714        rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3715        if (rc == X86EMUL_CONTINUE)
3716                ctxt->ops->invlpg(ctxt, linear);
3717        /* Disable writeback. */
3718        ctxt->dst.type = OP_NONE;
3719        return X86EMUL_CONTINUE;
3720}
3721
3722static int em_clts(struct x86_emulate_ctxt *ctxt)
3723{
3724        ulong cr0;
3725
3726        cr0 = ctxt->ops->get_cr(ctxt, 0);
3727        cr0 &= ~X86_CR0_TS;
3728        ctxt->ops->set_cr(ctxt, 0, cr0);
3729        return X86EMUL_CONTINUE;
3730}
3731
3732static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3733{
3734        int rc = ctxt->ops->fix_hypercall(ctxt);
3735
3736        if (rc != X86EMUL_CONTINUE)
3737                return rc;
3738
3739        /* Let the processor re-execute the fixed hypercall */
3740        ctxt->_eip = ctxt->eip;
3741        /* Disable writeback. */
3742        ctxt->dst.type = OP_NONE;
3743        return X86EMUL_CONTINUE;
3744}
3745
3746static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3747                                  void (*get)(struct x86_emulate_ctxt *ctxt,
3748                                              struct desc_ptr *ptr))
3749{
3750        struct desc_ptr desc_ptr;
3751
3752        if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3753            ctxt->ops->cpl(ctxt) > 0)
3754                return emulate_gp(ctxt, 0);
3755
3756        if (ctxt->mode == X86EMUL_MODE_PROT64)
3757                ctxt->op_bytes = 8;
3758        get(ctxt, &desc_ptr);
3759        if (ctxt->op_bytes == 2) {
3760                ctxt->op_bytes = 4;
3761                desc_ptr.address &= 0x00ffffff;
3762        }
3763        /* Disable writeback. */
3764        ctxt->dst.type = OP_NONE;
3765        return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3766                                   &desc_ptr, 2 + ctxt->op_bytes);
3767}
3768
3769static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3770{
3771        return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3772}
3773
3774static int em_sidt(struct x86_emulate_ctxt *ctxt)
3775{
3776        return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3777}
3778
3779static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3780{
3781        struct desc_ptr desc_ptr;
3782        int rc;
3783
3784        if (ctxt->mode == X86EMUL_MODE_PROT64)
3785                ctxt->op_bytes = 8;
3786        rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3787                             &desc_ptr.size, &desc_ptr.address,
3788                             ctxt->op_bytes);
3789        if (rc != X86EMUL_CONTINUE)
3790                return rc;
3791        if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3792            emul_is_noncanonical_address(desc_ptr.address, ctxt))
3793                return emulate_gp(ctxt, 0);
3794        if (lgdt)
3795                ctxt->ops->set_gdt(ctxt, &desc_ptr);
3796        else
3797                ctxt->ops->set_idt(ctxt, &desc_ptr);
3798        /* Disable writeback. */
3799        ctxt->dst.type = OP_NONE;
3800        return X86EMUL_CONTINUE;
3801}
3802
3803static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3804{
3805        return em_lgdt_lidt(ctxt, true);
3806}
3807
3808static int em_lidt(struct x86_emulate_ctxt *ctxt)
3809{
3810        return em_lgdt_lidt(ctxt, false);
3811}
3812
3813static int em_smsw(struct x86_emulate_ctxt *ctxt)
3814{
3815        if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3816            ctxt->ops->cpl(ctxt) > 0)
3817                return emulate_gp(ctxt, 0);
3818
3819        if (ctxt->dst.type == OP_MEM)
3820                ctxt->dst.bytes = 2;
3821        ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3822        return X86EMUL_CONTINUE;
3823}
3824
3825static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3826{
3827        ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3828                          | (ctxt->src.val & 0x0f));
3829        ctxt->dst.type = OP_NONE;
3830        return X86EMUL_CONTINUE;
3831}
3832
3833static int em_loop(struct x86_emulate_ctxt *ctxt)
3834{
3835        int rc = X86EMUL_CONTINUE;
3836
3837        register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3838        if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3839            (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3840                rc = jmp_rel(ctxt, ctxt->src.val);
3841
3842        return rc;
3843}
3844
3845static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3846{
3847        int rc = X86EMUL_CONTINUE;
3848
3849        if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3850                rc = jmp_rel(ctxt, ctxt->src.val);
3851
3852        return rc;
3853}
3854
3855static int em_in(struct x86_emulate_ctxt *ctxt)
3856{
3857        if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3858                             &ctxt->dst.val))
3859                return X86EMUL_IO_NEEDED;
3860
3861        return X86EMUL_CONTINUE;
3862}
3863
3864static int em_out(struct x86_emulate_ctxt *ctxt)
3865{
3866        ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3867                                    &ctxt->src.val, 1);
3868        /* Disable writeback. */
3869        ctxt->dst.type = OP_NONE;
3870        return X86EMUL_CONTINUE;
3871}
3872
3873static int em_cli(struct x86_emulate_ctxt *ctxt)
3874{
3875        if (emulator_bad_iopl(ctxt))
3876                return emulate_gp(ctxt, 0);
3877
3878        ctxt->eflags &= ~X86_EFLAGS_IF;
3879        return X86EMUL_CONTINUE;
3880}
3881
3882static int em_sti(struct x86_emulate_ctxt *ctxt)
3883{
3884        if (emulator_bad_iopl(ctxt))
3885                return emulate_gp(ctxt, 0);
3886
3887        ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3888        ctxt->eflags |= X86_EFLAGS_IF;
3889        return X86EMUL_CONTINUE;
3890}
3891
3892static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3893{
3894        u32 eax, ebx, ecx, edx;
3895        u64 msr = 0;
3896
3897        ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3898        if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3899            ctxt->ops->cpl(ctxt)) {
3900                return emulate_gp(ctxt, 0);
3901        }
3902
3903        eax = reg_read(ctxt, VCPU_REGS_RAX);
3904        ecx = reg_read(ctxt, VCPU_REGS_RCX);
3905        ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3906        *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3907        *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3908        *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3909        *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3910        return X86EMUL_CONTINUE;
3911}
3912
3913static int em_sahf(struct x86_emulate_ctxt *ctxt)
3914{
3915        u32 flags;
3916
3917        flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3918                X86_EFLAGS_SF;
3919        flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3920
3921        ctxt->eflags &= ~0xffUL;
3922        ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3923        return X86EMUL_CONTINUE;
3924}
3925
3926static int em_lahf(struct x86_emulate_ctxt *ctxt)
3927{
3928        *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3929        *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3930        return X86EMUL_CONTINUE;
3931}
3932
3933static int em_bswap(struct x86_emulate_ctxt *ctxt)
3934{
3935        switch (ctxt->op_bytes) {
3936#ifdef CONFIG_X86_64
3937        case 8:
3938                asm("bswap %0" : "+r"(ctxt->dst.val));
3939                break;
3940#endif
3941        default:
3942                asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3943                break;
3944        }
3945        return X86EMUL_CONTINUE;
3946}
3947
3948static int em_clflush(struct x86_emulate_ctxt *ctxt)
3949{
3950        /* emulating clflush regardless of cpuid */
3951        return X86EMUL_CONTINUE;
3952}
3953
3954static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3955{
3956        /* emulating clflushopt regardless of cpuid */
3957        return X86EMUL_CONTINUE;
3958}
3959
3960static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3961{
3962        ctxt->dst.val = (s32) ctxt->src.val;
3963        return X86EMUL_CONTINUE;
3964}
3965
3966static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3967{
3968        if (!ctxt->ops->guest_has_fxsr(ctxt))
3969                return emulate_ud(ctxt);
3970
3971        if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3972                return emulate_nm(ctxt);
3973
3974        /*
3975         * Don't emulate a case that should never be hit, instead of working
3976         * around a lack of fxsave64/fxrstor64 on old compilers.
3977         */
3978        if (ctxt->mode >= X86EMUL_MODE_PROT64)
3979                return X86EMUL_UNHANDLEABLE;
3980
3981        return X86EMUL_CONTINUE;
3982}
3983
3984/*
3985 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3986 * and restore MXCSR.
3987 */
3988static size_t __fxstate_size(int nregs)
3989{
3990        return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3991}
3992
3993static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3994{
3995        bool cr4_osfxsr;
3996        if (ctxt->mode == X86EMUL_MODE_PROT64)
3997                return __fxstate_size(16);
3998
3999        cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
4000        return __fxstate_size(cr4_osfxsr ? 8 : 0);
4001}
4002
4003/*
4004 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
4005 *  1) 16 bit mode
4006 *  2) 32 bit mode
4007 *     - like (1), but FIP and FDP (foo) are only 16 bit.  At least Intel CPUs
4008 *       preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
4009 *       save and restore
4010 *  3) 64-bit mode with REX.W prefix
4011 *     - like (2), but XMM 8-15 are being saved and restored
4012 *  4) 64-bit mode without REX.W prefix
4013 *     - like (3), but FIP and FDP are 64 bit
4014 *
4015 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
4016 * desired result.  (4) is not emulated.
4017 *
4018 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
4019 * and FPU DS) should match.
4020 */
4021static int em_fxsave(struct x86_emulate_ctxt *ctxt)
4022{
4023        struct fxregs_state fx_state;
4024        int rc;
4025
4026        rc = check_fxsr(ctxt);
4027        if (rc != X86EMUL_CONTINUE)
4028                return rc;
4029
4030        kvm_fpu_get();
4031
4032        rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
4033
4034        kvm_fpu_put();
4035
4036        if (rc != X86EMUL_CONTINUE)
4037                return rc;
4038
4039        return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
4040                                   fxstate_size(ctxt));
4041}
4042
4043/*
4044 * FXRSTOR might restore XMM registers not provided by the guest. Fill
4045 * in the host registers (via FXSAVE) instead, so they won't be modified.
4046 * (preemption has to stay disabled until FXRSTOR).
4047 *
4048 * Use noinline to keep the stack for other functions called by callers small.
4049 */
4050static noinline int fxregs_fixup(struct fxregs_state *fx_state,
4051                                 const size_t used_size)
4052{
4053        struct fxregs_state fx_tmp;
4054        int rc;
4055
4056        rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
4057        memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
4058               __fxstate_size(16) - used_size);
4059
4060        return rc;
4061}
4062
4063static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
4064{
4065        struct fxregs_state fx_state;
4066        int rc;
4067        size_t size;
4068
4069        rc = check_fxsr(ctxt);
4070        if (rc != X86EMUL_CONTINUE)
4071                return rc;
4072
4073        size = fxstate_size(ctxt);
4074        rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
4075        if (rc != X86EMUL_CONTINUE)
4076                return rc;
4077
4078        kvm_fpu_get();
4079
4080        if (size < __fxstate_size(16)) {
4081                rc = fxregs_fixup(&fx_state, size);
4082                if (rc != X86EMUL_CONTINUE)
4083                        goto out;
4084        }
4085
4086        if (fx_state.mxcsr >> 16) {
4087                rc = emulate_gp(ctxt, 0);
4088                goto out;
4089        }
4090
4091        if (rc == X86EMUL_CONTINUE)
4092                rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
4093
4094out:
4095        kvm_fpu_put();
4096
4097        return rc;
4098}
4099
4100static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
4101{
4102        u32 eax, ecx, edx;
4103
4104        eax = reg_read(ctxt, VCPU_REGS_RAX);
4105        edx = reg_read(ctxt, VCPU_REGS_RDX);
4106        ecx = reg_read(ctxt, VCPU_REGS_RCX);
4107
4108        if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
4109                return emulate_gp(ctxt, 0);
4110
4111        return X86EMUL_CONTINUE;
4112}
4113
4114static bool valid_cr(int nr)
4115{
4116        switch (nr) {
4117        case 0:
4118        case 2 ... 4:
4119        case 8:
4120                return true;
4121        default:
4122                return false;
4123        }
4124}
4125
4126static int check_cr_access(struct x86_emulate_ctxt *ctxt)
4127{
4128        if (!valid_cr(ctxt->modrm_reg))
4129                return emulate_ud(ctxt);
4130
4131        return X86EMUL_CONTINUE;
4132}
4133
4134static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
4135{
4136        unsigned long dr7;
4137
4138        ctxt->ops->get_dr(ctxt, 7, &dr7);
4139
4140        /* Check if DR7.Global_Enable is set */
4141        return dr7 & (1 << 13);
4142}
4143
4144static int check_dr_read(struct x86_emulate_ctxt *ctxt)
4145{
4146        int dr = ctxt->modrm_reg;
4147        u64 cr4;
4148
4149        if (dr > 7)
4150                return emulate_ud(ctxt);
4151
4152        cr4 = ctxt->ops->get_cr(ctxt, 4);
4153        if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
4154                return emulate_ud(ctxt);
4155
4156        if (check_dr7_gd(ctxt)) {
4157                ulong dr6;
4158
4159                ctxt->ops->get_dr(ctxt, 6, &dr6);
4160                dr6 &= ~DR_TRAP_BITS;
4161                dr6 |= DR6_BD | DR6_ACTIVE_LOW;
4162                ctxt->ops->set_dr(ctxt, 6, dr6);
4163                return emulate_db(ctxt);
4164        }
4165
4166        return X86EMUL_CONTINUE;
4167}
4168
4169static int check_dr_write(struct x86_emulate_ctxt *ctxt)
4170{
4171        u64 new_val = ctxt->src.val64;
4172        int dr = ctxt->modrm_reg;
4173
4174        if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
4175                return emulate_gp(ctxt, 0);
4176
4177        return check_dr_read(ctxt);
4178}
4179
4180static int check_svme(struct x86_emulate_ctxt *ctxt)
4181{
4182        u64 efer = 0;
4183
4184        ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4185
4186        if (!(efer & EFER_SVME))
4187                return emulate_ud(ctxt);
4188
4189        return X86EMUL_CONTINUE;
4190}
4191
4192static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
4193{
4194        u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4195
4196        /* Valid physical address? */
4197        if (rax & 0xffff000000000000ULL)
4198                return emulate_gp(ctxt, 0);
4199
4200        return check_svme(ctxt);
4201}
4202
4203static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
4204{
4205        u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4206
4207        if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4208                return emulate_gp(ctxt, 0);
4209
4210        return X86EMUL_CONTINUE;
4211}
4212
4213static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4214{
4215        u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4216        u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4217
4218        /*
4219         * VMware allows access to these Pseduo-PMCs even when read via RDPMC
4220         * in Ring3 when CR4.PCE=0.
4221         */
4222        if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
4223                return X86EMUL_CONTINUE;
4224
4225        if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4226            ctxt->ops->check_pmc(ctxt, rcx))
4227                return emulate_gp(ctxt, 0);
4228
4229        return X86EMUL_CONTINUE;
4230}
4231
4232static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4233{
4234        ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4235        if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4236                return emulate_gp(ctxt, 0);
4237
4238        return X86EMUL_CONTINUE;
4239}
4240
4241static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4242{
4243        ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4244        if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4245                return emulate_gp(ctxt, 0);
4246
4247        return X86EMUL_CONTINUE;
4248}
4249
4250#define D(_y) { .flags = (_y) }
4251#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4252#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4253                      .intercept = x86_intercept_##_i, .check_perm = (_p) }
4254#define N    D(NotImpl)
4255#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4256#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4257#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4258#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4259#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4260#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4261#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4262#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4263#define II(_f, _e, _i) \
4264        { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4265#define IIP(_f, _e, _i, _p) \
4266        { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4267          .intercept = x86_intercept_##_i, .check_perm = (_p) }
4268#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4269
4270#define D2bv(_f)      D((_f) | ByteOp), D(_f)
4271#define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4272#define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
4273#define F2bv(_f, _e)  F((_f) | ByteOp, _e), F(_f, _e)
4274#define I2bvIP(_f, _e, _i, _p) \
4275        IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4276
4277#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e),         \
4278                F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),     \
4279                F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4280
4281static const struct opcode group7_rm0[] = {
4282        N,
4283        I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4284        N, N, N, N, N, N,
4285};
4286
4287static const struct opcode group7_rm1[] = {
4288        DI(SrcNone | Priv, monitor),
4289        DI(SrcNone | Priv, mwait),
4290        N, N, N, N, N, N,
4291};
4292
4293static const struct opcode group7_rm2[] = {
4294        N,
4295        II(ImplicitOps | Priv,                  em_xsetbv,      xsetbv),
4296        N, N, N, N, N, N,
4297};
4298
4299static const struct opcode group7_rm3[] = {
4300        DIP(SrcNone | Prot | Priv,              vmrun,          check_svme_pa),
4301        II(SrcNone  | Prot | EmulateOnUD,       em_hypercall,   vmmcall),
4302        DIP(SrcNone | Prot | Priv,              vmload,         check_svme_pa),
4303        DIP(SrcNone | Prot | Priv,              vmsave,         check_svme_pa),
4304        DIP(SrcNone | Prot | Priv,              stgi,           check_svme),
4305        DIP(SrcNone | Prot | Priv,              clgi,           check_svme),
4306        DIP(SrcNone | Prot | Priv,              skinit,         check_svme),
4307        DIP(SrcNone | Prot | Priv,              invlpga,        check_svme),
4308};
4309
4310static const struct opcode group7_rm7[] = {
4311        N,
4312        DIP(SrcNone, rdtscp, check_rdtsc),
4313        N, N, N, N, N, N,
4314};
4315
4316static const struct opcode group1[] = {
4317        F(Lock, em_add),
4318        F(Lock | PageTable, em_or),
4319        F(Lock, em_adc),
4320        F(Lock, em_sbb),
4321        F(Lock | PageTable, em_and),
4322        F(Lock, em_sub),
4323        F(Lock, em_xor),
4324        F(NoWrite, em_cmp),
4325};
4326
4327static const struct opcode group1A[] = {
4328        I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4329};
4330
4331static const struct opcode group2[] = {
4332        F(DstMem | ModRM, em_rol),
4333        F(DstMem | ModRM, em_ror),
4334        F(DstMem | ModRM, em_rcl),
4335        F(DstMem | ModRM, em_rcr),
4336        F(DstMem | ModRM, em_shl),
4337        F(DstMem | ModRM, em_shr),
4338        F(DstMem | ModRM, em_shl),
4339        F(DstMem | ModRM, em_sar),
4340};
4341
4342static const struct opcode group3[] = {
4343        F(DstMem | SrcImm | NoWrite, em_test),
4344        F(DstMem | SrcImm | NoWrite, em_test),
4345        F(DstMem | SrcNone | Lock, em_not),
4346        F(DstMem | SrcNone | Lock, em_neg),
4347        F(DstXacc | Src2Mem, em_mul_ex),
4348        F(DstXacc | Src2Mem, em_imul_ex),
4349        F(DstXacc | Src2Mem, em_div_ex),
4350        F(DstXacc | Src2Mem, em_idiv_ex),
4351};
4352
4353static const struct opcode group4[] = {
4354        F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4355        F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4356        N, N, N, N, N, N,
4357};
4358
4359static const struct opcode group5[] = {
4360        F(DstMem | SrcNone | Lock,              em_inc),
4361        F(DstMem | SrcNone | Lock,              em_dec),
4362        I(SrcMem | NearBranch,                  em_call_near_abs),
4363        I(SrcMemFAddr | ImplicitOps,            em_call_far),
4364        I(SrcMem | NearBranch,                  em_jmp_abs),
4365        I(SrcMemFAddr | ImplicitOps,            em_jmp_far),
4366        I(SrcMem | Stack | TwoMemOp,            em_push), D(Undefined),
4367};
4368
4369static const struct opcode group6[] = {
4370        II(Prot | DstMem,          em_sldt, sldt),
4371        II(Prot | DstMem,          em_str, str),
4372        II(Prot | Priv | SrcMem16, em_lldt, lldt),
4373        II(Prot | Priv | SrcMem16, em_ltr, ltr),
4374        N, N, N, N,
4375};
4376
4377static const struct group_dual group7 = { {
4378        II(Mov | DstMem,                        em_sgdt, sgdt),
4379        II(Mov | DstMem,                        em_sidt, sidt),
4380        II(SrcMem | Priv,                       em_lgdt, lgdt),
4381        II(SrcMem | Priv,                       em_lidt, lidt),
4382        II(SrcNone | DstMem | Mov,              em_smsw, smsw), N,
4383        II(SrcMem16 | Mov | Priv,               em_lmsw, lmsw),
4384        II(SrcMem | ByteOp | Priv | NoAccess,   em_invlpg, invlpg),
4385}, {
4386        EXT(0, group7_rm0),
4387        EXT(0, group7_rm1),
4388        EXT(0, group7_rm2),
4389        EXT(0, group7_rm3),
4390        II(SrcNone | DstMem | Mov,              em_smsw, smsw), N,
4391        II(SrcMem16 | Mov | Priv,               em_lmsw, lmsw),
4392        EXT(0, group7_rm7),
4393} };
4394
4395static const struct opcode group8[] = {
4396        N, N, N, N,
4397        F(DstMem | SrcImmByte | NoWrite,                em_bt),
4398        F(DstMem | SrcImmByte | Lock | PageTable,       em_bts),
4399        F(DstMem | SrcImmByte | Lock,                   em_btr),
4400        F(DstMem | SrcImmByte | Lock | PageTable,       em_btc),
4401};
4402
4403/*
4404 * The "memory" destination is actually always a register, since we come
4405 * from the register case of group9.
4406 */
4407static const struct gprefix pfx_0f_c7_7 = {
4408        N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4409};
4410
4411
4412static const struct group_dual group9 = { {
4413        N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4414}, {
4415        N, N, N, N, N, N, N,
4416        GP(0, &pfx_0f_c7_7),
4417} };
4418
4419static const struct opcode group11[] = {
4420        I(DstMem | SrcImm | Mov | PageTable, em_mov),
4421        X7(D(Undefined)),
4422};
4423
4424static const struct gprefix pfx_0f_ae_7 = {
4425        I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4426};
4427
4428static const struct group_dual group15 = { {
4429        I(ModRM | Aligned16, em_fxsave),
4430        I(ModRM | Aligned16, em_fxrstor),
4431        N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4432}, {
4433        N, N, N, N, N, N, N, N,
4434} };
4435
4436static const struct gprefix pfx_0f_6f_0f_7f = {
4437        I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4438};
4439
4440static const struct instr_dual instr_dual_0f_2b = {
4441        I(0, em_mov), N
4442};
4443
4444static const struct gprefix pfx_0f_2b = {
4445        ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4446};
4447
4448static const struct gprefix pfx_0f_10_0f_11 = {
4449        I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4450};
4451
4452static const struct gprefix pfx_0f_28_0f_29 = {
4453        I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4454};
4455
4456static const struct gprefix pfx_0f_e7 = {
4457        N, I(Sse, em_mov), N, N,
4458};
4459
4460static const struct escape escape_d9 = { {
4461        N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4462}, {
4463        /* 0xC0 - 0xC7 */
4464        N, N, N, N, N, N, N, N,
4465        /* 0xC8 - 0xCF */
4466        N, N, N, N, N, N, N, N,
4467        /* 0xD0 - 0xC7 */
4468        N, N, N, N, N, N, N, N,
4469        /* 0xD8 - 0xDF */
4470        N, N, N, N, N, N, N, N,
4471        /* 0xE0 - 0xE7 */
4472        N, N, N, N, N, N, N, N,
4473        /* 0xE8 - 0xEF */
4474        N, N, N, N, N, N, N, N,
4475        /* 0xF0 - 0xF7 */
4476        N, N, N, N, N, N, N, N,
4477        /* 0xF8 - 0xFF */
4478        N, N, N, N, N, N, N, N,
4479} };
4480
4481static const struct escape escape_db = { {
4482        N, N, N, N, N, N, N, N,
4483}, {
4484        /* 0xC0 - 0xC7 */
4485        N, N, N, N, N, N, N, N,
4486        /* 0xC8 - 0xCF */
4487        N, N, N, N, N, N, N, N,
4488        /* 0xD0 - 0xC7 */
4489        N, N, N, N, N, N, N, N,
4490        /* 0xD8 - 0xDF */
4491        N, N, N, N, N, N, N, N,
4492        /* 0xE0 - 0xE7 */
4493        N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4494        /* 0xE8 - 0xEF */
4495        N, N, N, N, N, N, N, N,
4496        /* 0xF0 - 0xF7 */
4497        N, N, N, N, N, N, N, N,
4498        /* 0xF8 - 0xFF */
4499        N, N, N, N, N, N, N, N,
4500} };
4501
4502static const struct escape escape_dd = { {
4503        N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4504}, {
4505        /* 0xC0 - 0xC7 */
4506        N, N, N, N, N, N, N, N,
4507        /* 0xC8 - 0xCF */
4508        N, N, N, N, N, N, N, N,
4509        /* 0xD0 - 0xC7 */
4510        N, N, N, N, N, N, N, N,
4511        /* 0xD8 - 0xDF */
4512        N, N, N, N, N, N, N, N,
4513        /* 0xE0 - 0xE7 */
4514        N, N, N, N, N, N, N, N,
4515        /* 0xE8 - 0xEF */
4516        N, N, N, N, N, N, N, N,
4517        /* 0xF0 - 0xF7 */
4518        N, N, N, N, N, N, N, N,
4519        /* 0xF8 - 0xFF */
4520        N, N, N, N, N, N, N, N,
4521} };
4522
4523static const struct instr_dual instr_dual_0f_c3 = {
4524        I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4525};
4526
4527static const struct mode_dual mode_dual_63 = {
4528        N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4529};
4530
4531static const struct opcode opcode_table[256] = {
4532        /* 0x00 - 0x07 */
4533        F6ALU(Lock, em_add),
4534        I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4535        I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4536        /* 0x08 - 0x0F */
4537        F6ALU(Lock | PageTable, em_or),
4538        I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4539        N,
4540        /* 0x10 - 0x17 */
4541        F6ALU(Lock, em_adc),
4542        I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4543        I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4544        /* 0x18 - 0x1F */
4545        F6ALU(Lock, em_sbb),
4546        I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4547        I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4548        /* 0x20 - 0x27 */
4549        F6ALU(Lock | PageTable, em_and), N, N,
4550        /* 0x28 - 0x2F */
4551        F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4552        /* 0x30 - 0x37 */
4553        F6ALU(Lock, em_xor), N, N,
4554        /* 0x38 - 0x3F */
4555        F6ALU(NoWrite, em_cmp), N, N,
4556        /* 0x40 - 0x4F */
4557        X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4558        /* 0x50 - 0x57 */
4559        X8(I(SrcReg | Stack, em_push)),
4560        /* 0x58 - 0x5F */
4561        X8(I(DstReg | Stack, em_pop)),
4562        /* 0x60 - 0x67 */
4563        I(ImplicitOps | Stack | No64, em_pusha),
4564        I(ImplicitOps | Stack | No64, em_popa),
4565        N, MD(ModRM, &mode_dual_63),
4566        N, N, N, N,
4567        /* 0x68 - 0x6F */
4568        I(SrcImm | Mov | Stack, em_push),
4569        I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4570        I(SrcImmByte | Mov | Stack, em_push),
4571        I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4572        I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4573        I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4574        /* 0x70 - 0x7F */
4575        X16(D(SrcImmByte | NearBranch)),
4576        /* 0x80 - 0x87 */
4577        G(ByteOp | DstMem | SrcImm, group1),
4578        G(DstMem | SrcImm, group1),
4579        G(ByteOp | DstMem | SrcImm | No64, group1),
4580        G(DstMem | SrcImmByte, group1),
4581        F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4582        I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4583        /* 0x88 - 0x8F */
4584        I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4585        I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4586        I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4587        D(ModRM | SrcMem | NoAccess | DstReg),
4588        I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4589        G(0, group1A),
4590        /* 0x90 - 0x97 */
4591        DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4592        /* 0x98 - 0x9F */
4593        D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4594        I(SrcImmFAddr | No64, em_call_far), N,
4595        II(ImplicitOps | Stack, em_pushf, pushf),
4596        II(ImplicitOps | Stack, em_popf, popf),
4597        I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4598        /* 0xA0 - 0xA7 */
4599        I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4600        I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4601        I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4602        F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4603        /* 0xA8 - 0xAF */
4604        F2bv(DstAcc | SrcImm | NoWrite, em_test),
4605        I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4606        I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4607        F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4608        /* 0xB0 - 0xB7 */
4609        X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4610        /* 0xB8 - 0xBF */
4611        X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4612        /* 0xC0 - 0xC7 */
4613        G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4614        I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4615        I(ImplicitOps | NearBranch, em_ret),
4616        I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4617        I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4618        G(ByteOp, group11), G(0, group11),
4619        /* 0xC8 - 0xCF */
4620        I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4621        I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4622        I(ImplicitOps, em_ret_far),
4623        D(ImplicitOps), DI(SrcImmByte, intn),
4624        D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4625        /* 0xD0 - 0xD7 */
4626        G(Src2One | ByteOp, group2), G(Src2One, group2),
4627        G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4628        I(DstAcc | SrcImmUByte | No64, em_aam),
4629        I(DstAcc | SrcImmUByte | No64, em_aad),
4630        F(DstAcc | ByteOp | No64, em_salc),
4631        I(DstAcc | SrcXLat | ByteOp, em_mov),
4632        /* 0xD8 - 0xDF */
4633        N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4634        /* 0xE0 - 0xE7 */
4635        X3(I(SrcImmByte | NearBranch, em_loop)),
4636        I(SrcImmByte | NearBranch, em_jcxz),
4637        I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
4638        I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4639        /* 0xE8 - 0xEF */
4640        I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4641        I(SrcImmFAddr | No64, em_jmp_far),
4642        D(SrcImmByte | ImplicitOps | NearBranch),
4643        I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
4644        I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4645        /* 0xF0 - 0xF7 */
4646        N, DI(ImplicitOps, icebp), N, N,
4647        DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4648        G(ByteOp, group3), G(0, group3),
4649        /* 0xF8 - 0xFF */
4650        D(ImplicitOps), D(ImplicitOps),
4651        I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4652        D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4653};
4654
4655static const struct opcode twobyte_table[256] = {
4656        /* 0x00 - 0x0F */
4657        G(0, group6), GD(0, &group7), N, N,
4658        N, I(ImplicitOps | EmulateOnUD, em_syscall),
4659        II(ImplicitOps | Priv, em_clts, clts), N,
4660        DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4661        N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4662        /* 0x10 - 0x1F */
4663        GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4664        GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4665        N, N, N, N, N, N,
4666        D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4667        D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4668        D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4669        D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4670        D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4671        D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4672        /* 0x20 - 0x2F */
4673        DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4674        DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4675        IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4676                                                check_cr_access),
4677        IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4678                                                check_dr_write),
4679        N, N, N, N,
4680        GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4681        GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4682        N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4683        N, N, N, N,
4684        /* 0x30 - 0x3F */
4685        II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4686        IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4687        II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4688        IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4689        I(ImplicitOps | EmulateOnUD, em_sysenter),
4690        I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4691        N, N,
4692        N, N, N, N, N, N, N, N,
4693        /* 0x40 - 0x4F */
4694        X16(D(DstReg | SrcMem | ModRM)),
4695        /* 0x50 - 0x5F */
4696        N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4697        /* 0x60 - 0x6F */
4698        N, N, N, N,
4699        N, N, N, N,
4700        N, N, N, N,
4701        N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4702        /* 0x70 - 0x7F */
4703        N, N, N, N,
4704        N, N, N, N,
4705        N, N, N, N,
4706        N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4707        /* 0x80 - 0x8F */
4708        X16(D(SrcImm | NearBranch)),
4709        /* 0x90 - 0x9F */
4710        X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4711        /* 0xA0 - 0xA7 */
4712        I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4713        II(ImplicitOps, em_cpuid, cpuid),
4714        F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4715        F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4716        F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4717        /* 0xA8 - 0xAF */
4718        I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4719        II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4720        F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4721        F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4722        F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4723        GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4724        /* 0xB0 - 0xB7 */
4725        I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4726        I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4727        F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4728        I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4729        I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4730        D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4731        /* 0xB8 - 0xBF */
4732        N, N,
4733        G(BitOp, group8),
4734        F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4735        I(DstReg | SrcMem | ModRM, em_bsf_c),
4736        I(DstReg | SrcMem | ModRM, em_bsr_c),
4737        D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4738        /* 0xC0 - 0xC7 */
4739        F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4740        N, ID(0, &instr_dual_0f_c3),
4741        N, N, N, GD(0, &group9),
4742        /* 0xC8 - 0xCF */
4743        X8(I(DstReg, em_bswap)),
4744        /* 0xD0 - 0xDF */
4745        N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4746        /* 0xE0 - 0xEF */
4747        N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4748        N, N, N, N, N, N, N, N,
4749        /* 0xF0 - 0xFF */
4750        N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4751};
4752
4753static const struct instr_dual instr_dual_0f_38_f0 = {
4754        I(DstReg | SrcMem | Mov, em_movbe), N
4755};
4756
4757static const struct instr_dual instr_dual_0f_38_f1 = {
4758        I(DstMem | SrcReg | Mov, em_movbe), N
4759};
4760
4761static const struct gprefix three_byte_0f_38_f0 = {
4762        ID(0, &instr_dual_0f_38_f0), N, N, N
4763};
4764
4765static const struct gprefix three_byte_0f_38_f1 = {
4766        ID(0, &instr_dual_0f_38_f1), N, N, N
4767};
4768
4769/*
4770 * Insns below are selected by the prefix which indexed by the third opcode
4771 * byte.
4772 */
4773static const struct opcode opcode_map_0f_38[256] = {
4774        /* 0x00 - 0x7f */
4775        X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4776        /* 0x80 - 0xef */
4777        X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4778        /* 0xf0 - 0xf1 */
4779        GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4780        GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4781        /* 0xf2 - 0xff */
4782        N, N, X4(N), X8(N)
4783};
4784
4785#undef D
4786#undef N
4787#undef G
4788#undef GD
4789#undef I
4790#undef GP
4791#undef EXT
4792#undef MD
4793#undef ID
4794
4795#undef D2bv
4796#undef D2bvIP
4797#undef I2bv
4798#undef I2bvIP
4799#undef I6ALU
4800
4801static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4802{
4803        unsigned size;
4804
4805        size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4806        if (size == 8)
4807                size = 4;
4808        return size;
4809}
4810
4811static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4812                      unsigned size, bool sign_extension)
4813{
4814        int rc = X86EMUL_CONTINUE;
4815
4816        op->type = OP_IMM;
4817        op->bytes = size;
4818        op->addr.mem.ea = ctxt->_eip;
4819        /* NB. Immediates are sign-extended as necessary. */
4820        switch (op->bytes) {
4821        case 1:
4822                op->val = insn_fetch(s8, ctxt);
4823                break;
4824        case 2:
4825                op->val = insn_fetch(s16, ctxt);
4826                break;
4827        case 4:
4828                op->val = insn_fetch(s32, ctxt);
4829                break;
4830        case 8:
4831                op->val = insn_fetch(s64, ctxt);
4832                break;
4833        }
4834        if (!sign_extension) {
4835                switch (op->bytes) {
4836                case 1:
4837                        op->val &= 0xff;
4838                        break;
4839                case 2:
4840                        op->val &= 0xffff;
4841                        break;
4842                case 4:
4843                        op->val &= 0xffffffff;
4844                        break;
4845                }
4846        }
4847done:
4848        return rc;
4849}
4850
4851static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4852                          unsigned d)
4853{
4854        int rc = X86EMUL_CONTINUE;
4855
4856        switch (d) {
4857        case OpReg:
4858                decode_register_operand(ctxt, op);
4859                break;
4860        case OpImmUByte:
4861                rc = decode_imm(ctxt, op, 1, false);
4862                break;
4863        case OpMem:
4864                ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4865        mem_common:
4866                *op = ctxt->memop;
4867                ctxt->memopp = op;
4868                if (ctxt->d & BitOp)
4869                        fetch_bit_operand(ctxt);
4870                op->orig_val = op->val;
4871                break;
4872        case OpMem64:
4873                ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4874                goto mem_common;
4875        case OpAcc:
4876                op->type = OP_REG;
4877                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4878                op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4879                fetch_register_operand(op);
4880                op->orig_val = op->val;
4881                break;
4882        case OpAccLo:
4883                op->type = OP_REG;
4884                op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4885                op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4886                fetch_register_operand(op);
4887                op->orig_val = op->val;
4888                break;
4889        case OpAccHi:
4890                if (ctxt->d & ByteOp) {
4891                        op->type = OP_NONE;
4892                        break;
4893                }
4894                op->type = OP_REG;
4895                op->bytes = ctxt->op_bytes;
4896                op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4897                fetch_register_operand(op);
4898                op->orig_val = op->val;
4899                break;
4900        case OpDI:
4901                op->type = OP_MEM;
4902                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4903                op->addr.mem.ea =
4904                        register_address(ctxt, VCPU_REGS_RDI);
4905                op->addr.mem.seg = VCPU_SREG_ES;
4906                op->val = 0;
4907                op->count = 1;
4908                break;
4909        case OpDX:
4910                op->type = OP_REG;
4911                op->bytes = 2;
4912                op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4913                fetch_register_operand(op);
4914                break;
4915        case OpCL:
4916                op->type = OP_IMM;
4917                op->bytes = 1;
4918                op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4919                break;
4920        case OpImmByte:
4921                rc = decode_imm(ctxt, op, 1, true);
4922                break;
4923        case OpOne:
4924                op->type = OP_IMM;
4925                op->bytes = 1;
4926                op->val = 1;
4927                break;
4928        case OpImm:
4929                rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4930                break;
4931        case OpImm64:
4932                rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4933                break;
4934        case OpMem8:
4935                ctxt->memop.bytes = 1;
4936                if (ctxt->memop.type == OP_REG) {
4937                        ctxt->memop.addr.reg = decode_register(ctxt,
4938                                        ctxt->modrm_rm, true);
4939                        fetch_register_operand(&ctxt->memop);
4940                }
4941                goto mem_common;
4942        case OpMem16:
4943                ctxt->memop.bytes = 2;
4944                goto mem_common;
4945        case OpMem32:
4946                ctxt->memop.bytes = 4;
4947                goto mem_common;
4948        case OpImmU16:
4949                rc = decode_imm(ctxt, op, 2, false);
4950                break;
4951        case OpImmU:
4952                rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4953                break;
4954        case OpSI:
4955                op->type = OP_MEM;
4956                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4957                op->addr.mem.ea =
4958                        register_address(ctxt, VCPU_REGS_RSI);
4959                op->addr.mem.seg = ctxt->seg_override;
4960                op->val = 0;
4961                op->count = 1;
4962                break;
4963        case OpXLat:
4964                op->type = OP_MEM;
4965                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4966                op->addr.mem.ea =
4967                        address_mask(ctxt,
4968                                reg_read(ctxt, VCPU_REGS_RBX) +
4969                                (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4970                op->addr.mem.seg = ctxt->seg_override;
4971                op->val = 0;
4972                break;
4973        case OpImmFAddr:
4974                op->type = OP_IMM;
4975                op->addr.mem.ea = ctxt->_eip;
4976                op->bytes = ctxt->op_bytes + 2;
4977                insn_fetch_arr(op->valptr, op->bytes, ctxt);
4978                break;
4979        case OpMemFAddr:
4980                ctxt->memop.bytes = ctxt->op_bytes + 2;
4981                goto mem_common;
4982        case OpES:
4983                op->type = OP_IMM;
4984                op->val = VCPU_SREG_ES;
4985                break;
4986        case OpCS:
4987                op->type = OP_IMM;
4988                op->val = VCPU_SREG_CS;
4989                break;
4990        case OpSS:
4991                op->type = OP_IMM;
4992                op->val = VCPU_SREG_SS;
4993                break;
4994        case OpDS:
4995                op->type = OP_IMM;
4996                op->val = VCPU_SREG_DS;
4997                break;
4998        case OpFS:
4999                op->type = OP_IMM;
5000                op->val = VCPU_SREG_FS;
5001                break;
5002        case OpGS:
5003                op->type = OP_IMM;
5004                op->val = VCPU_SREG_GS;
5005                break;
5006        case OpImplicit:
5007                /* Special instructions do their own operand decoding. */
5008        default:
5009                op->type = OP_NONE; /* Disable writeback. */
5010                break;
5011        }
5012
5013done:
5014        return rc;
5015}
5016
5017int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
5018{
5019        int rc = X86EMUL_CONTINUE;
5020        int mode = ctxt->mode;
5021        int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
5022        bool op_prefix = false;
5023        bool has_seg_override = false;
5024        struct opcode opcode;
5025        u16 dummy;
5026        struct desc_struct desc;
5027
5028        ctxt->memop.type = OP_NONE;
5029        ctxt->memopp = NULL;
5030        ctxt->_eip = ctxt->eip;
5031        ctxt->fetch.ptr = ctxt->fetch.data;
5032        ctxt->fetch.end = ctxt->fetch.data + insn_len;
5033        ctxt->opcode_len = 1;
5034        ctxt->intercept = x86_intercept_none;
5035        if (insn_len > 0)
5036                memcpy(ctxt->fetch.data, insn, insn_len);
5037        else {
5038                rc = __do_insn_fetch_bytes(ctxt, 1);
5039                if (rc != X86EMUL_CONTINUE)
5040                        goto done;
5041        }
5042
5043        switch (mode) {
5044        case X86EMUL_MODE_REAL:
5045        case X86EMUL_MODE_VM86:
5046                def_op_bytes = def_ad_bytes = 2;
5047                ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
5048                if (desc.d)
5049                        def_op_bytes = def_ad_bytes = 4;
5050                break;
5051        case X86EMUL_MODE_PROT16:
5052                def_op_bytes = def_ad_bytes = 2;
5053                break;
5054        case X86EMUL_MODE_PROT32:
5055                def_op_bytes = def_ad_bytes = 4;
5056                break;
5057#ifdef CONFIG_X86_64
5058        case X86EMUL_MODE_PROT64:
5059                def_op_bytes = 4;
5060                def_ad_bytes = 8;
5061                break;
5062#endif
5063        default:
5064                return EMULATION_FAILED;
5065        }
5066
5067        ctxt->op_bytes = def_op_bytes;
5068        ctxt->ad_bytes = def_ad_bytes;
5069
5070        /* Legacy prefixes. */
5071        for (;;) {
5072                switch (ctxt->b = insn_fetch(u8, ctxt)) {
5073                case 0x66:      /* operand-size override */
5074                        op_prefix = true;
5075                        /* switch between 2/4 bytes */
5076                        ctxt->op_bytes = def_op_bytes ^ 6;
5077                        break;
5078                case 0x67:      /* address-size override */
5079                        if (mode == X86EMUL_MODE_PROT64)
5080                                /* switch between 4/8 bytes */
5081                                ctxt->ad_bytes = def_ad_bytes ^ 12;
5082                        else
5083                                /* switch between 2/4 bytes */
5084                                ctxt->ad_bytes = def_ad_bytes ^ 6;
5085                        break;
5086                case 0x26:      /* ES override */
5087                        has_seg_override = true;
5088                        ctxt->seg_override = VCPU_SREG_ES;
5089                        break;
5090                case 0x2e:      /* CS override */
5091                        has_seg_override = true;
5092                        ctxt->seg_override = VCPU_SREG_CS;
5093                        break;
5094                case 0x36:      /* SS override */
5095                        has_seg_override = true;
5096                        ctxt->seg_override = VCPU_SREG_SS;
5097                        break;
5098                case 0x3e:      /* DS override */
5099                        has_seg_override = true;
5100                        ctxt->seg_override = VCPU_SREG_DS;
5101                        break;
5102                case 0x64:      /* FS override */
5103                        has_seg_override = true;
5104                        ctxt->seg_override = VCPU_SREG_FS;
5105                        break;
5106                case 0x65:      /* GS override */
5107                        has_seg_override = true;
5108                        ctxt->seg_override = VCPU_SREG_GS;
5109                        break;
5110                case 0x40 ... 0x4f: /* REX */
5111                        if (mode != X86EMUL_MODE_PROT64)
5112                                goto done_prefixes;
5113                        ctxt->rex_prefix = ctxt->b;
5114                        continue;
5115                case 0xf0:      /* LOCK */
5116                        ctxt->lock_prefix = 1;
5117                        break;
5118                case 0xf2:      /* REPNE/REPNZ */
5119                case 0xf3:      /* REP/REPE/REPZ */
5120                        ctxt->rep_prefix = ctxt->b;
5121                        break;
5122                default:
5123                        goto done_prefixes;
5124                }
5125
5126                /* Any legacy prefix after a REX prefix nullifies its effect. */
5127
5128                ctxt->rex_prefix = 0;
5129        }
5130
5131done_prefixes:
5132
5133        /* REX prefix. */
5134        if (ctxt->rex_prefix & 8)
5135                ctxt->op_bytes = 8;     /* REX.W */
5136
5137        /* Opcode byte(s). */
5138        opcode = opcode_table[ctxt->b];
5139        /* Two-byte opcode? */
5140        if (ctxt->b == 0x0f) {
5141                ctxt->opcode_len = 2;
5142                ctxt->b = insn_fetch(u8, ctxt);
5143                opcode = twobyte_table[ctxt->b];
5144
5145                /* 0F_38 opcode map */
5146                if (ctxt->b == 0x38) {
5147                        ctxt->opcode_len = 3;
5148                        ctxt->b = insn_fetch(u8, ctxt);
5149                        opcode = opcode_map_0f_38[ctxt->b];
5150                }
5151        }
5152        ctxt->d = opcode.flags;
5153
5154        if (ctxt->d & ModRM)
5155                ctxt->modrm = insn_fetch(u8, ctxt);
5156
5157        /* vex-prefix instructions are not implemented */
5158        if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5159            (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5160                ctxt->d = NotImpl;
5161        }
5162
5163        while (ctxt->d & GroupMask) {
5164                switch (ctxt->d & GroupMask) {
5165                case Group:
5166                        goffset = (ctxt->modrm >> 3) & 7;
5167                        opcode = opcode.u.group[goffset];
5168                        break;
5169                case GroupDual:
5170                        goffset = (ctxt->modrm >> 3) & 7;
5171                        if ((ctxt->modrm >> 6) == 3)
5172                                opcode = opcode.u.gdual->mod3[goffset];
5173                        else
5174                                opcode = opcode.u.gdual->mod012[goffset];
5175                        break;
5176                case RMExt:
5177                        goffset = ctxt->modrm & 7;
5178                        opcode = opcode.u.group[goffset];
5179                        break;
5180                case Prefix:
5181                        if (ctxt->rep_prefix && op_prefix)
5182                                return EMULATION_FAILED;
5183                        simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5184                        switch (simd_prefix) {
5185                        case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5186                        case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5187                        case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5188                        case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5189                        }
5190                        break;
5191                case Escape:
5192                        if (ctxt->modrm > 0xbf) {
5193                                size_t size = ARRAY_SIZE(opcode.u.esc->high);
5194                                u32 index = array_index_nospec(
5195                                        ctxt->modrm - 0xc0, size);
5196
5197                                opcode = opcode.u.esc->high[index];
5198                        } else {
5199                                opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5200                        }
5201                        break;
5202                case InstrDual:
5203                        if ((ctxt->modrm >> 6) == 3)
5204                                opcode = opcode.u.idual->mod3;
5205                        else
5206                                opcode = opcode.u.idual->mod012;
5207                        break;
5208                case ModeDual:
5209                        if (ctxt->mode == X86EMUL_MODE_PROT64)
5210                                opcode = opcode.u.mdual->mode64;
5211                        else
5212                                opcode = opcode.u.mdual->mode32;
5213                        break;
5214                default:
5215                        return EMULATION_FAILED;
5216                }
5217
5218                ctxt->d &= ~(u64)GroupMask;
5219                ctxt->d |= opcode.flags;
5220        }
5221
5222        /* Unrecognised? */
5223        if (ctxt->d == 0)
5224                return EMULATION_FAILED;
5225
5226        ctxt->execute = opcode.u.execute;
5227
5228        if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
5229            likely(!(ctxt->d & EmulateOnUD)))
5230                return EMULATION_FAILED;
5231
5232        if (unlikely(ctxt->d &
5233            (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5234             No16))) {
5235                /*
5236                 * These are copied unconditionally here, and checked unconditionally
5237                 * in x86_emulate_insn.
5238                 */
5239                ctxt->check_perm = opcode.check_perm;
5240                ctxt->intercept = opcode.intercept;
5241
5242                if (ctxt->d & NotImpl)
5243                        return EMULATION_FAILED;
5244
5245                if (mode == X86EMUL_MODE_PROT64) {
5246                        if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5247                                ctxt->op_bytes = 8;
5248                        else if (ctxt->d & NearBranch)
5249                                ctxt->op_bytes = 8;
5250                }
5251
5252                if (ctxt->d & Op3264) {
5253                        if (mode == X86EMUL_MODE_PROT64)
5254                                ctxt->op_bytes = 8;
5255                        else
5256                                ctxt->op_bytes = 4;
5257                }
5258
5259                if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5260                        ctxt->op_bytes = 4;
5261
5262                if (ctxt->d & Sse)
5263                        ctxt->op_bytes = 16;
5264                else if (ctxt->d & Mmx)
5265                        ctxt->op_bytes = 8;
5266        }
5267
5268        /* ModRM and SIB bytes. */
5269        if (ctxt->d & ModRM) {
5270                rc = decode_modrm(ctxt, &ctxt->memop);
5271                if (!has_seg_override) {
5272                        has_seg_override = true;
5273                        ctxt->seg_override = ctxt->modrm_seg;
5274                }
5275        } else if (ctxt->d & MemAbs)
5276                rc = decode_abs(ctxt, &ctxt->memop);
5277        if (rc != X86EMUL_CONTINUE)
5278                goto done;
5279
5280        if (!has_seg_override)
5281                ctxt->seg_override = VCPU_SREG_DS;
5282
5283        ctxt->memop.addr.mem.seg = ctxt->seg_override;
5284
5285        /*
5286         * Decode and fetch the source operand: register, memory
5287         * or immediate.
5288         */
5289        rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5290        if (rc != X86EMUL_CONTINUE)
5291                goto done;
5292
5293        /*
5294         * Decode and fetch the second source operand: register, memory
5295         * or immediate.
5296         */
5297        rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5298        if (rc != X86EMUL_CONTINUE)
5299                goto done;
5300
5301        /* Decode and fetch the destination operand: register or memory. */
5302        rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5303
5304        if (ctxt->rip_relative && likely(ctxt->memopp))
5305                ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5306                                        ctxt->memopp->addr.mem.ea + ctxt->_eip);
5307
5308done:
5309        if (rc == X86EMUL_PROPAGATE_FAULT)
5310                ctxt->have_exception = true;
5311        return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5312}
5313
5314bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5315{
5316        return ctxt->d & PageTable;
5317}
5318
5319static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5320{
5321        /* The second termination condition only applies for REPE
5322         * and REPNE. Test if the repeat string operation prefix is
5323         * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5324         * corresponding termination condition according to:
5325         *      - if REPE/REPZ and ZF = 0 then done
5326         *      - if REPNE/REPNZ and ZF = 1 then done
5327         */
5328        if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5329             (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5330            && (((ctxt->rep_prefix == REPE_PREFIX) &&
5331                 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5332                || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5333                    ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5334                return true;
5335
5336        return false;
5337}
5338
5339static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5340{
5341        int rc;
5342
5343        kvm_fpu_get();
5344        rc = asm_safe("fwait");
5345        kvm_fpu_put();
5346
5347        if (unlikely(rc != X86EMUL_CONTINUE))
5348                return emulate_exception(ctxt, MF_VECTOR, 0, false);
5349
5350        return X86EMUL_CONTINUE;
5351}
5352
5353static void fetch_possible_mmx_operand(struct operand *op)
5354{
5355        if (op->type == OP_MM)
5356                kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5357}
5358
5359static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5360{
5361        ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5362
5363        if (!(ctxt->d & ByteOp))
5364                fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5365
5366        asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5367            : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5368              [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5369            : "c"(ctxt->src2.val));
5370
5371        ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5372        if (!fop) /* exception is returned in fop variable */
5373                return emulate_de(ctxt);
5374        return X86EMUL_CONTINUE;
5375}
5376
5377void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5378{
5379        memset(&ctxt->rip_relative, 0,
5380               (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5381
5382        ctxt->io_read.pos = 0;
5383        ctxt->io_read.end = 0;
5384        ctxt->mem_read.end = 0;
5385}
5386
5387int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5388{
5389        const struct x86_emulate_ops *ops = ctxt->ops;
5390        int rc = X86EMUL_CONTINUE;
5391        int saved_dst_type = ctxt->dst.type;
5392        unsigned emul_flags;
5393
5394        ctxt->mem_read.pos = 0;
5395
5396        /* LOCK prefix is allowed only with some instructions */
5397        if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5398                rc = emulate_ud(ctxt);
5399                goto done;
5400        }
5401
5402        if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5403                rc = emulate_ud(ctxt);
5404                goto done;
5405        }
5406
5407        emul_flags = ctxt->ops->get_hflags(ctxt);
5408        if (unlikely(ctxt->d &
5409                     (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5410                if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5411                                (ctxt->d & Undefined)) {
5412                        rc = emulate_ud(ctxt);
5413                        goto done;
5414                }
5415
5416                if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5417                    || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5418                        rc = emulate_ud(ctxt);
5419                        goto done;
5420                }
5421
5422                if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5423                        rc = emulate_nm(ctxt);
5424                        goto done;
5425                }
5426
5427                if (ctxt->d & Mmx) {
5428                        rc = flush_pending_x87_faults(ctxt);
5429                        if (rc != X86EMUL_CONTINUE)
5430                                goto done;
5431                        /*
5432                         * Now that we know the fpu is exception safe, we can fetch
5433                         * operands from it.
5434                         */
5435                        fetch_possible_mmx_operand(&ctxt->src);
5436                        fetch_possible_mmx_operand(&ctxt->src2);
5437                        if (!(ctxt->d & Mov))
5438                                fetch_possible_mmx_operand(&ctxt->dst);
5439                }
5440
5441                if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5442                        rc = emulator_check_intercept(ctxt, ctxt->intercept,
5443                                                      X86_ICPT_PRE_EXCEPT);
5444                        if (rc != X86EMUL_CONTINUE)
5445                                goto done;
5446                }
5447
5448                /* Instruction can only be executed in protected mode */
5449                if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5450                        rc = emulate_ud(ctxt);
5451                        goto done;
5452                }
5453
5454                /* Privileged instruction can be executed only in CPL=0 */
5455                if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5456                        if (ctxt->d & PrivUD)
5457                                rc = emulate_ud(ctxt);
5458                        else
5459                                rc = emulate_gp(ctxt, 0);
5460                        goto done;
5461                }
5462
5463                /* Do instruction specific permission checks */
5464                if (ctxt->d & CheckPerm) {
5465                        rc = ctxt->check_perm(ctxt);
5466                        if (rc != X86EMUL_CONTINUE)
5467                                goto done;
5468                }
5469
5470                if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5471                        rc = emulator_check_intercept(ctxt, ctxt->intercept,
5472                                                      X86_ICPT_POST_EXCEPT);
5473                        if (rc != X86EMUL_CONTINUE)
5474                                goto done;
5475                }
5476
5477                if (ctxt->rep_prefix && (ctxt->d & String)) {
5478                        /* All REP prefixes have the same first termination condition */
5479                        if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5480                                string_registers_quirk(ctxt);
5481                                ctxt->eip = ctxt->_eip;
5482                                ctxt->eflags &= ~X86_EFLAGS_RF;
5483                                goto done;
5484                        }
5485                }
5486        }
5487
5488        if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5489                rc = segmented_read(ctxt, ctxt->src.addr.mem,
5490                                    ctxt->src.valptr, ctxt->src.bytes);
5491                if (rc != X86EMUL_CONTINUE)
5492                        goto done;
5493                ctxt->src.orig_val64 = ctxt->src.val64;
5494        }
5495
5496        if (ctxt->src2.type == OP_MEM) {
5497                rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5498                                    &ctxt->src2.val, ctxt->src2.bytes);
5499                if (rc != X86EMUL_CONTINUE)
5500                        goto done;
5501        }
5502
5503        if ((ctxt->d & DstMask) == ImplicitOps)
5504                goto special_insn;
5505
5506
5507        if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5508                /* optimisation - avoid slow emulated read if Mov */
5509                rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5510                                   &ctxt->dst.val, ctxt->dst.bytes);
5511                if (rc != X86EMUL_CONTINUE) {
5512                        if (!(ctxt->d & NoWrite) &&
5513                            rc == X86EMUL_PROPAGATE_FAULT &&
5514                            ctxt->exception.vector == PF_VECTOR)
5515                                ctxt->exception.error_code |= PFERR_WRITE_MASK;
5516                        goto done;
5517                }
5518        }
5519        /* Copy full 64-bit value for CMPXCHG8B.  */
5520        ctxt->dst.orig_val64 = ctxt->dst.val64;
5521
5522special_insn:
5523
5524        if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5525                rc = emulator_check_intercept(ctxt, ctxt->intercept,
5526                                              X86_ICPT_POST_MEMACCESS);
5527                if (rc != X86EMUL_CONTINUE)
5528                        goto done;
5529        }
5530
5531        if (ctxt->rep_prefix && (ctxt->d & String))
5532                ctxt->eflags |= X86_EFLAGS_RF;
5533        else
5534                ctxt->eflags &= ~X86_EFLAGS_RF;
5535
5536        if (ctxt->execute) {
5537                if (ctxt->d & Fastop)
5538                        rc = fastop(ctxt, ctxt->fop);
5539                else
5540                        rc = ctxt->execute(ctxt);
5541                if (rc != X86EMUL_CONTINUE)
5542                        goto done;
5543                goto writeback;
5544        }
5545
5546        if (ctxt->opcode_len == 2)
5547                goto twobyte_insn;
5548        else if (ctxt->opcode_len == 3)
5549                goto threebyte_insn;
5550
5551        switch (ctxt->b) {
5552        case 0x70 ... 0x7f: /* jcc (short) */
5553                if (test_cc(ctxt->b, ctxt->eflags))
5554                        rc = jmp_rel(ctxt, ctxt->src.val);
5555                break;
5556        case 0x8d: /* lea r16/r32, m */
5557                ctxt->dst.val = ctxt->src.addr.mem.ea;
5558                break;
5559        case 0x90 ... 0x97: /* nop / xchg reg, rax */
5560                if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5561                        ctxt->dst.type = OP_NONE;
5562                else
5563                        rc = em_xchg(ctxt);
5564                break;
5565        case 0x98: /* cbw/cwde/cdqe */
5566                switch (ctxt->op_bytes) {
5567                case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5568                case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5569                case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5570                }
5571                break;
5572        case 0xcc:              /* int3 */
5573                rc = emulate_int(ctxt, 3);
5574                break;
5575        case 0xcd:              /* int n */
5576                rc = emulate_int(ctxt, ctxt->src.val);
5577                break;
5578        case 0xce:              /* into */
5579                if (ctxt->eflags & X86_EFLAGS_OF)
5580                        rc = emulate_int(ctxt, 4);
5581                break;
5582        case 0xe9: /* jmp rel */
5583        case 0xeb: /* jmp rel short */
5584                rc = jmp_rel(ctxt, ctxt->src.val);
5585                ctxt->dst.type = OP_NONE; /* Disable writeback. */
5586                break;
5587        case 0xf4:              /* hlt */
5588                ctxt->ops->halt(ctxt);
5589                break;
5590        case 0xf5:      /* cmc */
5591                /* complement carry flag from eflags reg */
5592                ctxt->eflags ^= X86_EFLAGS_CF;
5593                break;
5594        case 0xf8: /* clc */
5595                ctxt->eflags &= ~X86_EFLAGS_CF;
5596                break;
5597        case 0xf9: /* stc */
5598                ctxt->eflags |= X86_EFLAGS_CF;
5599                break;
5600        case 0xfc: /* cld */
5601                ctxt->eflags &= ~X86_EFLAGS_DF;
5602                break;
5603        case 0xfd: /* std */
5604                ctxt->eflags |= X86_EFLAGS_DF;
5605                break;
5606        default:
5607                goto cannot_emulate;
5608        }
5609
5610        if (rc != X86EMUL_CONTINUE)
5611                goto done;
5612
5613writeback:
5614        if (ctxt->d & SrcWrite) {
5615                BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5616                rc = writeback(ctxt, &ctxt->src);
5617                if (rc != X86EMUL_CONTINUE)
5618                        goto done;
5619        }
5620        if (!(ctxt->d & NoWrite)) {
5621                rc = writeback(ctxt, &ctxt->dst);
5622                if (rc != X86EMUL_CONTINUE)
5623                        goto done;
5624        }
5625
5626        /*
5627         * restore dst type in case the decoding will be reused
5628         * (happens for string instruction )
5629         */
5630        ctxt->dst.type = saved_dst_type;
5631
5632        if ((ctxt->d & SrcMask) == SrcSI)
5633                string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5634
5635        if ((ctxt->d & DstMask) == DstDI)
5636                string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5637
5638        if (ctxt->rep_prefix && (ctxt->d & String)) {
5639                unsigned int count;
5640                struct read_cache *r = &ctxt->io_read;
5641                if ((ctxt->d & SrcMask) == SrcSI)
5642                        count = ctxt->src.count;
5643                else
5644                        count = ctxt->dst.count;
5645                register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5646
5647                if (!string_insn_completed(ctxt)) {
5648                        /*
5649                         * Re-enter guest when pio read ahead buffer is empty
5650                         * or, if it is not used, after each 1024 iteration.
5651                         */
5652                        if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5653                            (r->end == 0 || r->end != r->pos)) {
5654                                /*
5655                                 * Reset read cache. Usually happens before
5656                                 * decode, but since instruction is restarted
5657                                 * we have to do it here.
5658                                 */
5659                                ctxt->mem_read.end = 0;
5660                                writeback_registers(ctxt);
5661                                return EMULATION_RESTART;
5662                        }
5663                        goto done; /* skip rip writeback */
5664                }
5665                ctxt->eflags &= ~X86_EFLAGS_RF;
5666        }
5667
5668        ctxt->eip = ctxt->_eip;
5669        if (ctxt->mode != X86EMUL_MODE_PROT64)
5670                ctxt->eip = (u32)ctxt->_eip;
5671
5672done:
5673        if (rc == X86EMUL_PROPAGATE_FAULT) {
5674                WARN_ON(ctxt->exception.vector > 0x1f);
5675                ctxt->have_exception = true;
5676        }
5677        if (rc == X86EMUL_INTERCEPTED)
5678                return EMULATION_INTERCEPTED;
5679
5680        if (rc == X86EMUL_CONTINUE)
5681                writeback_registers(ctxt);
5682
5683        return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5684
5685twobyte_insn:
5686        switch (ctxt->b) {
5687        case 0x09:              /* wbinvd */
5688                (ctxt->ops->wbinvd)(ctxt);
5689                break;
5690        case 0x08:              /* invd */
5691        case 0x0d:              /* GrpP (prefetch) */
5692        case 0x18:              /* Grp16 (prefetch/nop) */
5693        case 0x1f:              /* nop */
5694                break;
5695        case 0x20: /* mov cr, reg */
5696                ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5697                break;
5698        case 0x21: /* mov from dr to reg */
5699                ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5700                break;
5701        case 0x40 ... 0x4f:     /* cmov */
5702                if (test_cc(ctxt->b, ctxt->eflags))
5703                        ctxt->dst.val = ctxt->src.val;
5704                else if (ctxt->op_bytes != 4)
5705                        ctxt->dst.type = OP_NONE; /* no writeback */
5706                break;
5707        case 0x80 ... 0x8f: /* jnz rel, etc*/
5708                if (test_cc(ctxt->b, ctxt->eflags))
5709                        rc = jmp_rel(ctxt, ctxt->src.val);
5710                break;
5711        case 0x90 ... 0x9f:     /* setcc r/m8 */
5712                ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5713                break;
5714        case 0xb6 ... 0xb7:     /* movzx */
5715                ctxt->dst.bytes = ctxt->op_bytes;
5716                ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5717                                                       : (u16) ctxt->src.val;
5718                break;
5719        case 0xbe ... 0xbf:     /* movsx */
5720                ctxt->dst.bytes = ctxt->op_bytes;
5721                ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5722                                                        (s16) ctxt->src.val;
5723                break;
5724        default:
5725                goto cannot_emulate;
5726        }
5727
5728threebyte_insn:
5729
5730        if (rc != X86EMUL_CONTINUE)
5731                goto done;
5732
5733        goto writeback;
5734
5735cannot_emulate:
5736        return EMULATION_FAILED;
5737}
5738
5739void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5740{
5741        invalidate_registers(ctxt);
5742}
5743
5744void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5745{
5746        writeback_registers(ctxt);
5747}
5748
5749bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5750{
5751        if (ctxt->rep_prefix && (ctxt->d & String))
5752                return false;
5753
5754        if (ctxt->d & TwoMemOp)
5755                return false;
5756
5757        return true;
5758}
5759