qemu/hw/i386/kvmvapic.c
<<
>>
Prefs
   1/*
   2 * TPR optimization for 32-bit Windows guests (XP and Server 2003)
   3 *
   4 * Copyright (C) 2007-2008 Qumranet Technologies
   5 * Copyright (C) 2012      Jan Kiszka, Siemens AG
   6 *
   7 * This work is licensed under the terms of the GNU GPL version 2, or
   8 * (at your option) any later version. See the COPYING file in the
   9 * top-level directory.
  10 */
  11
  12#include "qemu/osdep.h"
  13#include "qemu/module.h"
  14#include "cpu.h"
  15#include "sysemu/sysemu.h"
  16#include "sysemu/cpus.h"
  17#include "sysemu/hw_accel.h"
  18#include "sysemu/kvm.h"
  19#include "hw/i386/apic_internal.h"
  20#include "hw/sysbus.h"
  21#include "hw/boards.h"
  22#include "tcg/tcg.h"
  23
  24#define VAPIC_IO_PORT           0x7e
  25
  26#define VAPIC_CPU_SHIFT         7
  27
  28#define ROM_BLOCK_SIZE          512
  29#define ROM_BLOCK_MASK          (~(ROM_BLOCK_SIZE - 1))
  30
  31typedef enum VAPICMode {
  32    VAPIC_INACTIVE = 0,
  33    VAPIC_ACTIVE   = 1,
  34    VAPIC_STANDBY  = 2,
  35} VAPICMode;
  36
  37typedef struct VAPICHandlers {
  38    uint32_t set_tpr;
  39    uint32_t set_tpr_eax;
  40    uint32_t get_tpr[8];
  41    uint32_t get_tpr_stack;
  42} QEMU_PACKED VAPICHandlers;
  43
  44typedef struct GuestROMState {
  45    char signature[8];
  46    uint32_t vaddr;
  47    uint32_t fixup_start;
  48    uint32_t fixup_end;
  49    uint32_t vapic_vaddr;
  50    uint32_t vapic_size;
  51    uint32_t vcpu_shift;
  52    uint32_t real_tpr_addr;
  53    VAPICHandlers up;
  54    VAPICHandlers mp;
  55} QEMU_PACKED GuestROMState;
  56
  57typedef struct VAPICROMState {
  58    SysBusDevice busdev;
  59    MemoryRegion io;
  60    MemoryRegion rom;
  61    uint32_t state;
  62    uint32_t rom_state_paddr;
  63    uint32_t rom_state_vaddr;
  64    uint32_t vapic_paddr;
  65    uint32_t real_tpr_addr;
  66    GuestROMState rom_state;
  67    size_t rom_size;
  68    bool rom_mapped_writable;
  69    VMChangeStateEntry *vmsentry;
  70} VAPICROMState;
  71
  72#define TYPE_VAPIC "kvmvapic"
  73#define VAPIC(obj) OBJECT_CHECK(VAPICROMState, (obj), TYPE_VAPIC)
  74
  75#define TPR_INSTR_ABS_MODRM             0x1
  76#define TPR_INSTR_MATCH_MODRM_REG       0x2
  77
  78typedef struct TPRInstruction {
  79    uint8_t opcode;
  80    uint8_t modrm_reg;
  81    unsigned int flags;
  82    TPRAccess access;
  83    size_t length;
  84    off_t addr_offset;
  85} TPRInstruction;
  86
  87/* must be sorted by length, shortest first */
  88static const TPRInstruction tpr_instr[] = {
  89    { /* mov abs to eax */
  90        .opcode = 0xa1,
  91        .access = TPR_ACCESS_READ,
  92        .length = 5,
  93        .addr_offset = 1,
  94    },
  95    { /* mov eax to abs */
  96        .opcode = 0xa3,
  97        .access = TPR_ACCESS_WRITE,
  98        .length = 5,
  99        .addr_offset = 1,
 100    },
 101    { /* mov r32 to r/m32 */
 102        .opcode = 0x89,
 103        .flags = TPR_INSTR_ABS_MODRM,
 104        .access = TPR_ACCESS_WRITE,
 105        .length = 6,
 106        .addr_offset = 2,
 107    },
 108    { /* mov r/m32 to r32 */
 109        .opcode = 0x8b,
 110        .flags = TPR_INSTR_ABS_MODRM,
 111        .access = TPR_ACCESS_READ,
 112        .length = 6,
 113        .addr_offset = 2,
 114    },
 115    { /* push r/m32 */
 116        .opcode = 0xff,
 117        .modrm_reg = 6,
 118        .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
 119        .access = TPR_ACCESS_READ,
 120        .length = 6,
 121        .addr_offset = 2,
 122    },
 123    { /* mov imm32, r/m32 (c7/0) */
 124        .opcode = 0xc7,
 125        .modrm_reg = 0,
 126        .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
 127        .access = TPR_ACCESS_WRITE,
 128        .length = 10,
 129        .addr_offset = 2,
 130    },
 131};
 132
 133static void read_guest_rom_state(VAPICROMState *s)
 134{
 135    cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state,
 136                             sizeof(GuestROMState));
 137}
 138
 139static void write_guest_rom_state(VAPICROMState *s)
 140{
 141    cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state,
 142                              sizeof(GuestROMState));
 143}
 144
 145static void update_guest_rom_state(VAPICROMState *s)
 146{
 147    read_guest_rom_state(s);
 148
 149    s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr);
 150    s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT);
 151
 152    write_guest_rom_state(s);
 153}
 154
 155static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env)
 156{
 157    CPUState *cs = env_cpu(env);
 158    hwaddr paddr;
 159    target_ulong addr;
 160
 161    if (s->state == VAPIC_ACTIVE) {
 162        return 0;
 163    }
 164    /*
 165     * If there is no prior TPR access instruction we could analyze (which is
 166     * the case after resume from hibernation), we need to scan the possible
 167     * virtual address space for the APIC mapping.
 168     */
 169    for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) {
 170        paddr = cpu_get_phys_page_debug(cs, addr);
 171        if (paddr != APIC_DEFAULT_ADDRESS) {
 172            continue;
 173        }
 174        s->real_tpr_addr = addr + 0x80;
 175        update_guest_rom_state(s);
 176        return 0;
 177    }
 178    return -1;
 179}
 180
 181static uint8_t modrm_reg(uint8_t modrm)
 182{
 183    return (modrm >> 3) & 7;
 184}
 185
 186static bool is_abs_modrm(uint8_t modrm)
 187{
 188    return (modrm & 0xc7) == 0x05;
 189}
 190
 191static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr)
 192{
 193    return opcode[0] == instr->opcode &&
 194        (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) &&
 195        (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) ||
 196         modrm_reg(opcode[1]) == instr->modrm_reg);
 197}
 198
 199static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu,
 200                                    target_ulong *pip, TPRAccess access)
 201{
 202    CPUState *cs = CPU(cpu);
 203    const TPRInstruction *instr;
 204    target_ulong ip = *pip;
 205    uint8_t opcode[2];
 206    uint32_t real_tpr_addr;
 207    int i;
 208
 209    if ((ip & 0xf0000000ULL) != 0x80000000ULL &&
 210        (ip & 0xf0000000ULL) != 0xe0000000ULL) {
 211        return -1;
 212    }
 213
 214    /*
 215     * Early Windows 2003 SMP initialization contains a
 216     *
 217     *   mov imm32, r/m32
 218     *
 219     * instruction that is patched by TPR optimization. The problem is that
 220     * RSP, used by the patched instruction, is zero, so the guest gets a
 221     * double fault and dies.
 222     */
 223    if (cpu->env.regs[R_ESP] == 0) {
 224        return -1;
 225    }
 226
 227    if (kvm_enabled() && !kvm_irqchip_in_kernel()) {
 228        /*
 229         * KVM without kernel-based TPR access reporting will pass an IP that
 230         * points after the accessing instruction. So we need to look backward
 231         * to find the reason.
 232         */
 233        for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
 234            instr = &tpr_instr[i];
 235            if (instr->access != access) {
 236                continue;
 237            }
 238            if (cpu_memory_rw_debug(cs, ip - instr->length, opcode,
 239                                    sizeof(opcode), 0) < 0) {
 240                return -1;
 241            }
 242            if (opcode_matches(opcode, instr)) {
 243                ip -= instr->length;
 244                goto instruction_ok;
 245            }
 246        }
 247        return -1;
 248    } else {
 249        if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) {
 250            return -1;
 251        }
 252        for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
 253            instr = &tpr_instr[i];
 254            if (opcode_matches(opcode, instr)) {
 255                goto instruction_ok;
 256            }
 257        }
 258        return -1;
 259    }
 260
 261instruction_ok:
 262    /*
 263     * Grab the virtual TPR address from the instruction
 264     * and update the cached values.
 265     */
 266    if (cpu_memory_rw_debug(cs, ip + instr->addr_offset,
 267                            (void *)&real_tpr_addr,
 268                            sizeof(real_tpr_addr), 0) < 0) {
 269        return -1;
 270    }
 271    real_tpr_addr = le32_to_cpu(real_tpr_addr);
 272    if ((real_tpr_addr & 0xfff) != 0x80) {
 273        return -1;
 274    }
 275    s->real_tpr_addr = real_tpr_addr;
 276    update_guest_rom_state(s);
 277
 278    *pip = ip;
 279    return 0;
 280}
 281
 282static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip)
 283{
 284    CPUState *cs = env_cpu(env);
 285    hwaddr paddr;
 286    uint32_t rom_state_vaddr;
 287    uint32_t pos, patch, offset;
 288
 289    /* nothing to do if already activated */
 290    if (s->state == VAPIC_ACTIVE) {
 291        return 0;
 292    }
 293
 294    /* bail out if ROM init code was not executed (missing ROM?) */
 295    if (s->state == VAPIC_INACTIVE) {
 296        return -1;
 297    }
 298
 299    /* find out virtual address of the ROM */
 300    rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000);
 301    paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr);
 302    if (paddr == -1) {
 303        return -1;
 304    }
 305    paddr += rom_state_vaddr & ~TARGET_PAGE_MASK;
 306    if (paddr != s->rom_state_paddr) {
 307        return -1;
 308    }
 309    read_guest_rom_state(s);
 310    if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) {
 311        return -1;
 312    }
 313    s->rom_state_vaddr = rom_state_vaddr;
 314
 315    /* fixup addresses in ROM if needed */
 316    if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) {
 317        return 0;
 318    }
 319    for (pos = le32_to_cpu(s->rom_state.fixup_start);
 320         pos < le32_to_cpu(s->rom_state.fixup_end);
 321         pos += 4) {
 322        cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr,
 323                                 &offset, sizeof(offset));
 324        offset = le32_to_cpu(offset);
 325        cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch));
 326        patch = le32_to_cpu(patch);
 327        patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr);
 328        patch = cpu_to_le32(patch);
 329        cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch));
 330    }
 331    read_guest_rom_state(s);
 332    s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) -
 333        le32_to_cpu(s->rom_state.vaddr);
 334
 335    return 0;
 336}
 337
 338/*
 339 * Tries to read the unique processor number from the Kernel Processor Control
 340 * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR
 341 * cannot be accessed or is considered invalid. This also ensures that we are
 342 * not patching the wrong guest.
 343 */
 344static int get_kpcr_number(X86CPU *cpu)
 345{
 346    CPUX86State *env = &cpu->env;
 347    struct kpcr {
 348        uint8_t  fill1[0x1c];
 349        uint32_t self;
 350        uint8_t  fill2[0x31];
 351        uint8_t  number;
 352    } QEMU_PACKED kpcr;
 353
 354    if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base,
 355                            (void *)&kpcr, sizeof(kpcr), 0) < 0 ||
 356        kpcr.self != env->segs[R_FS].base) {
 357        return -1;
 358    }
 359    return kpcr.number;
 360}
 361
 362static int vapic_enable(VAPICROMState *s, X86CPU *cpu)
 363{
 364    int cpu_number = get_kpcr_number(cpu);
 365    hwaddr vapic_paddr;
 366    static const uint8_t enabled = 1;
 367
 368    if (cpu_number < 0) {
 369        return -1;
 370    }
 371    vapic_paddr = s->vapic_paddr +
 372        (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT);
 373    cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled),
 374                              &enabled, sizeof(enabled));
 375    apic_enable_vapic(cpu->apic_state, vapic_paddr);
 376
 377    s->state = VAPIC_ACTIVE;
 378
 379    return 0;
 380}
 381
 382static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte)
 383{
 384    cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1);
 385}
 386
 387static void patch_call(X86CPU *cpu, target_ulong ip, uint32_t target)
 388{
 389    uint32_t offset;
 390
 391    offset = cpu_to_le32(target - ip - 5);
 392    patch_byte(cpu, ip, 0xe8); /* call near */
 393    cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1);
 394}
 395
 396typedef struct PatchInfo {
 397    VAPICHandlers *handler;
 398    target_ulong ip;
 399} PatchInfo;
 400
 401static void do_patch_instruction(CPUState *cs, run_on_cpu_data data)
 402{
 403    X86CPU *x86_cpu = X86_CPU(cs);
 404    PatchInfo *info = (PatchInfo *) data.host_ptr;
 405    VAPICHandlers *handlers = info->handler;
 406    target_ulong ip = info->ip;
 407    uint8_t opcode[2];
 408    uint32_t imm32 = 0;
 409
 410    cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0);
 411
 412    switch (opcode[0]) {
 413    case 0x89: /* mov r32 to r/m32 */
 414        patch_byte(x86_cpu, ip, 0x50 + modrm_reg(opcode[1]));  /* push reg */
 415        patch_call(x86_cpu, ip + 1, handlers->set_tpr);
 416        break;
 417    case 0x8b: /* mov r/m32 to r32 */
 418        patch_byte(x86_cpu, ip, 0x90);
 419        patch_call(x86_cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]);
 420        break;
 421    case 0xa1: /* mov abs to eax */
 422        patch_call(x86_cpu, ip, handlers->get_tpr[0]);
 423        break;
 424    case 0xa3: /* mov eax to abs */
 425        patch_call(x86_cpu, ip, handlers->set_tpr_eax);
 426        break;
 427    case 0xc7: /* mov imm32, r/m32 (c7/0) */
 428        patch_byte(x86_cpu, ip, 0x68);  /* push imm32 */
 429        cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0);
 430        cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1);
 431        patch_call(x86_cpu, ip + 5, handlers->set_tpr);
 432        break;
 433    case 0xff: /* push r/m32 */
 434        patch_byte(x86_cpu, ip, 0x50); /* push eax */
 435        patch_call(x86_cpu, ip + 1, handlers->get_tpr_stack);
 436        break;
 437    default:
 438        abort();
 439    }
 440
 441    g_free(info);
 442}
 443
 444static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
 445{
 446    MachineState *ms = MACHINE(qdev_get_machine());
 447    CPUState *cs = CPU(cpu);
 448    VAPICHandlers *handlers;
 449    PatchInfo *info;
 450
 451    if (ms->smp.cpus == 1) {
 452        handlers = &s->rom_state.up;
 453    } else {
 454        handlers = &s->rom_state.mp;
 455    }
 456
 457    info  = g_new(PatchInfo, 1);
 458    info->handler = handlers;
 459    info->ip = ip;
 460
 461    async_safe_run_on_cpu(cs, do_patch_instruction, RUN_ON_CPU_HOST_PTR(info));
 462}
 463
 464void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip,
 465                             TPRAccess access)
 466{
 467    VAPICROMState *s = VAPIC(dev);
 468    X86CPU *cpu = X86_CPU(cs);
 469    CPUX86State *env = &cpu->env;
 470
 471    cpu_synchronize_state(cs);
 472
 473    if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) {
 474        if (s->state == VAPIC_ACTIVE) {
 475            vapic_enable(s, cpu);
 476        }
 477        return;
 478    }
 479    if (update_rom_mapping(s, env, ip) < 0) {
 480        return;
 481    }
 482    if (vapic_enable(s, cpu) < 0) {
 483        return;
 484    }
 485    patch_instruction(s, cpu, ip);
 486}
 487
 488typedef struct VAPICEnableTPRReporting {
 489    DeviceState *apic;
 490    bool enable;
 491} VAPICEnableTPRReporting;
 492
 493static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data)
 494{
 495    VAPICEnableTPRReporting *info = data.host_ptr;
 496    apic_enable_tpr_access_reporting(info->apic, info->enable);
 497}
 498
 499static void vapic_enable_tpr_reporting(bool enable)
 500{
 501    VAPICEnableTPRReporting info = {
 502        .enable = enable,
 503    };
 504    CPUState *cs;
 505    X86CPU *cpu;
 506
 507    CPU_FOREACH(cs) {
 508        cpu = X86_CPU(cs);
 509        info.apic = cpu->apic_state;
 510        run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info));
 511    }
 512}
 513
 514static void vapic_reset(DeviceState *dev)
 515{
 516    VAPICROMState *s = VAPIC(dev);
 517
 518    s->state = VAPIC_INACTIVE;
 519    s->rom_state_paddr = 0;
 520    vapic_enable_tpr_reporting(false);
 521}
 522
 523/*
 524 * Set the IRQ polling hypercalls to the supported variant:
 525 *  - vmcall if using KVM in-kernel irqchip
 526 *  - 32-bit VAPIC port write otherwise
 527 */
 528static int patch_hypercalls(VAPICROMState *s)
 529{
 530    hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
 531    static const uint8_t vmcall_pattern[] = { /* vmcall */
 532        0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1
 533    };
 534    static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */
 535        0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e
 536    };
 537    uint8_t alternates[2];
 538    const uint8_t *pattern;
 539    const uint8_t *patch;
 540    off_t pos;
 541    uint8_t *rom;
 542
 543    rom = g_malloc(s->rom_size);
 544    cpu_physical_memory_read(rom_paddr, rom, s->rom_size);
 545
 546    for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) {
 547        if (kvm_irqchip_in_kernel()) {
 548            pattern = outl_pattern;
 549            alternates[0] = outl_pattern[7];
 550            alternates[1] = outl_pattern[7];
 551            patch = &vmcall_pattern[5];
 552        } else {
 553            pattern = vmcall_pattern;
 554            alternates[0] = vmcall_pattern[7];
 555            alternates[1] = 0xd9; /* AMD's VMMCALL */
 556            patch = &outl_pattern[5];
 557        }
 558        if (memcmp(rom + pos, pattern, 7) == 0 &&
 559            (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) {
 560            cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3);
 561            /*
 562             * Don't flush the tb here. Under ordinary conditions, the patched
 563             * calls are miles away from the current IP. Under malicious
 564             * conditions, the guest could trick us to crash.
 565             */
 566        }
 567    }
 568
 569    g_free(rom);
 570    return 0;
 571}
 572
 573/*
 574 * For TCG mode or the time KVM honors read-only memory regions, we need to
 575 * enable write access to the option ROM so that variables can be updated by
 576 * the guest.
 577 */
 578static int vapic_map_rom_writable(VAPICROMState *s)
 579{
 580    hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
 581    MemoryRegionSection section;
 582    MemoryRegion *as;
 583    size_t rom_size;
 584    uint8_t *ram;
 585
 586    as = sysbus_address_space(&s->busdev);
 587
 588    if (s->rom_mapped_writable) {
 589        memory_region_del_subregion(as, &s->rom);
 590        object_unparent(OBJECT(&s->rom));
 591    }
 592
 593    /* grab RAM memory region (region @rom_paddr may still be pc.rom) */
 594    section = memory_region_find(as, 0, 1);
 595
 596    /* read ROM size from RAM region */
 597    if (rom_paddr + 2 >= memory_region_size(section.mr)) {
 598        return -1;
 599    }
 600    ram = memory_region_get_ram_ptr(section.mr);
 601    rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE;
 602    if (rom_size == 0) {
 603        return -1;
 604    }
 605    s->rom_size = rom_size;
 606
 607    /* We need to round to avoid creating subpages
 608     * from which we cannot run code. */
 609    rom_size += rom_paddr & ~TARGET_PAGE_MASK;
 610    rom_paddr &= TARGET_PAGE_MASK;
 611    rom_size = TARGET_PAGE_ALIGN(rom_size);
 612
 613    memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr,
 614                             rom_paddr, rom_size);
 615    memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000);
 616    s->rom_mapped_writable = true;
 617    memory_region_unref(section.mr);
 618
 619    return 0;
 620}
 621
 622static int vapic_prepare(VAPICROMState *s)
 623{
 624    if (vapic_map_rom_writable(s) < 0) {
 625        return -1;
 626    }
 627
 628    if (patch_hypercalls(s) < 0) {
 629        return -1;
 630    }
 631
 632    vapic_enable_tpr_reporting(true);
 633
 634    return 0;
 635}
 636
 637static void vapic_write(void *opaque, hwaddr addr, uint64_t data,
 638                        unsigned int size)
 639{
 640    VAPICROMState *s = opaque;
 641    X86CPU *cpu;
 642    CPUX86State *env;
 643    hwaddr rom_paddr;
 644
 645    if (!current_cpu) {
 646        return;
 647    }
 648
 649    cpu_synchronize_state(current_cpu);
 650    cpu = X86_CPU(current_cpu);
 651    env = &cpu->env;
 652
 653    /*
 654     * The VAPIC supports two PIO-based hypercalls, both via port 0x7E.
 655     *  o 16-bit write access:
 656     *    Reports the option ROM initialization to the hypervisor. Written
 657     *    value is the offset of the state structure in the ROM.
 658     *  o 8-bit write access:
 659     *    Reactivates the VAPIC after a guest hibernation, i.e. after the
 660     *    option ROM content has been re-initialized by a guest power cycle.
 661     *  o 32-bit write access:
 662     *    Poll for pending IRQs, considering the current VAPIC state.
 663     */
 664    switch (size) {
 665    case 2:
 666        if (s->state == VAPIC_INACTIVE) {
 667            rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK;
 668            s->rom_state_paddr = rom_paddr + data;
 669
 670            s->state = VAPIC_STANDBY;
 671        }
 672        if (vapic_prepare(s) < 0) {
 673            s->state = VAPIC_INACTIVE;
 674            s->rom_state_paddr = 0;
 675            break;
 676        }
 677        break;
 678    case 1:
 679        if (kvm_enabled()) {
 680            /*
 681             * Disable triggering instruction in ROM by writing a NOP.
 682             *
 683             * We cannot do this in TCG mode as the reported IP is not
 684             * accurate.
 685             */
 686            pause_all_vcpus();
 687            patch_byte(cpu, env->eip - 2, 0x66);
 688            patch_byte(cpu, env->eip - 1, 0x90);
 689            resume_all_vcpus();
 690        }
 691
 692        if (s->state == VAPIC_ACTIVE) {
 693            break;
 694        }
 695        if (update_rom_mapping(s, env, env->eip) < 0) {
 696            break;
 697        }
 698        if (find_real_tpr_addr(s, env) < 0) {
 699            break;
 700        }
 701        vapic_enable(s, cpu);
 702        break;
 703    default:
 704    case 4:
 705        if (!kvm_irqchip_in_kernel()) {
 706            apic_poll_irq(cpu->apic_state);
 707        }
 708        break;
 709    }
 710}
 711
 712static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size)
 713{
 714    return 0xffffffff;
 715}
 716
 717static const MemoryRegionOps vapic_ops = {
 718    .write = vapic_write,
 719    .read = vapic_read,
 720    .endianness = DEVICE_NATIVE_ENDIAN,
 721};
 722
 723static void vapic_realize(DeviceState *dev, Error **errp)
 724{
 725    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 726    VAPICROMState *s = VAPIC(dev);
 727
 728    memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2);
 729    sysbus_add_io(sbd, VAPIC_IO_PORT, &s->io);
 730    sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2);
 731
 732    option_rom[nb_option_roms].name = "kvmvapic.bin";
 733    option_rom[nb_option_roms].bootindex = -1;
 734    nb_option_roms++;
 735}
 736
 737static void do_vapic_enable(CPUState *cs, run_on_cpu_data data)
 738{
 739    VAPICROMState *s = data.host_ptr;
 740    X86CPU *cpu = X86_CPU(cs);
 741
 742    static const uint8_t enabled = 1;
 743    cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
 744                              &enabled, sizeof(enabled));
 745    apic_enable_vapic(cpu->apic_state, s->vapic_paddr);
 746    s->state = VAPIC_ACTIVE;
 747}
 748
 749static void kvmvapic_vm_state_change(void *opaque, int running,
 750                                     RunState state)
 751{
 752    MachineState *ms = MACHINE(qdev_get_machine());
 753    VAPICROMState *s = opaque;
 754    uint8_t *zero;
 755
 756    if (!running) {
 757        return;
 758    }
 759
 760    if (s->state == VAPIC_ACTIVE) {
 761        if (ms->smp.cpus == 1) {
 762            run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s));
 763        } else {
 764            zero = g_malloc0(s->rom_state.vapic_size);
 765            cpu_physical_memory_write(s->vapic_paddr, zero,
 766                                      s->rom_state.vapic_size);
 767            g_free(zero);
 768        }
 769    }
 770
 771    qemu_del_vm_change_state_handler(s->vmsentry);
 772    s->vmsentry = NULL;
 773}
 774
 775static int vapic_post_load(void *opaque, int version_id)
 776{
 777    VAPICROMState *s = opaque;
 778
 779    /*
 780     * The old implementation of qemu-kvm did not provide the state
 781     * VAPIC_STANDBY. Reconstruct it.
 782     */
 783    if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) {
 784        s->state = VAPIC_STANDBY;
 785    }
 786
 787    if (s->state != VAPIC_INACTIVE) {
 788        if (vapic_prepare(s) < 0) {
 789            return -1;
 790        }
 791    }
 792
 793    if (!s->vmsentry) {
 794        s->vmsentry =
 795            qemu_add_vm_change_state_handler(kvmvapic_vm_state_change, s);
 796    }
 797    return 0;
 798}
 799
 800static const VMStateDescription vmstate_handlers = {
 801    .name = "kvmvapic-handlers",
 802    .version_id = 1,
 803    .minimum_version_id = 1,
 804    .fields = (VMStateField[]) {
 805        VMSTATE_UINT32(set_tpr, VAPICHandlers),
 806        VMSTATE_UINT32(set_tpr_eax, VAPICHandlers),
 807        VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8),
 808        VMSTATE_UINT32(get_tpr_stack, VAPICHandlers),
 809        VMSTATE_END_OF_LIST()
 810    }
 811};
 812
 813static const VMStateDescription vmstate_guest_rom = {
 814    .name = "kvmvapic-guest-rom",
 815    .version_id = 1,
 816    .minimum_version_id = 1,
 817    .fields = (VMStateField[]) {
 818        VMSTATE_UNUSED(8),     /* signature */
 819        VMSTATE_UINT32(vaddr, GuestROMState),
 820        VMSTATE_UINT32(fixup_start, GuestROMState),
 821        VMSTATE_UINT32(fixup_end, GuestROMState),
 822        VMSTATE_UINT32(vapic_vaddr, GuestROMState),
 823        VMSTATE_UINT32(vapic_size, GuestROMState),
 824        VMSTATE_UINT32(vcpu_shift, GuestROMState),
 825        VMSTATE_UINT32(real_tpr_addr, GuestROMState),
 826        VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
 827        VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
 828        VMSTATE_END_OF_LIST()
 829    }
 830};
 831
 832static const VMStateDescription vmstate_vapic = {
 833    .name = "kvm-tpr-opt",      /* compatible with qemu-kvm VAPIC */
 834    .version_id = 1,
 835    .minimum_version_id = 1,
 836    .post_load = vapic_post_load,
 837    .fields = (VMStateField[]) {
 838        VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom,
 839                       GuestROMState),
 840        VMSTATE_UINT32(state, VAPICROMState),
 841        VMSTATE_UINT32(real_tpr_addr, VAPICROMState),
 842        VMSTATE_UINT32(rom_state_vaddr, VAPICROMState),
 843        VMSTATE_UINT32(vapic_paddr, VAPICROMState),
 844        VMSTATE_UINT32(rom_state_paddr, VAPICROMState),
 845        VMSTATE_END_OF_LIST()
 846    }
 847};
 848
 849static void vapic_class_init(ObjectClass *klass, void *data)
 850{
 851    DeviceClass *dc = DEVICE_CLASS(klass);
 852
 853    dc->reset   = vapic_reset;
 854    dc->vmsd    = &vmstate_vapic;
 855    dc->realize = vapic_realize;
 856}
 857
 858static const TypeInfo vapic_type = {
 859    .name          = TYPE_VAPIC,
 860    .parent        = TYPE_SYS_BUS_DEVICE,
 861    .instance_size = sizeof(VAPICROMState),
 862    .class_init    = vapic_class_init,
 863};
 864
 865static void vapic_register(void)
 866{
 867    type_register_static(&vapic_type);
 868}
 869
 870type_init(vapic_register);
 871