qemu/hw/i386/kvmvapic.c
<<
>>
Prefs
   1/*
   2 * TPR optimization for 32-bit Windows guests (XP and Server 2003)
   3 *
   4 * Copyright (C) 2007-2008 Qumranet Technologies
   5 * Copyright (C) 2012      Jan Kiszka, Siemens AG
   6 *
   7 * This work is licensed under the terms of the GNU GPL version 2, or
   8 * (at your option) any later version. See the COPYING file in the
   9 * top-level directory.
  10 */
  11
  12#include "qemu/osdep.h"
  13#include "qemu/module.h"
  14#include "sysemu/sysemu.h"
  15#include "sysemu/cpus.h"
  16#include "sysemu/hw_accel.h"
  17#include "sysemu/kvm.h"
  18#include "sysemu/runstate.h"
  19#include "hw/i386/apic_internal.h"
  20#include "hw/sysbus.h"
  21#include "hw/boards.h"
  22#include "migration/vmstate.h"
  23#include "qom/object.h"
  24
  25#define VAPIC_IO_PORT           0x7e
  26
  27#define VAPIC_CPU_SHIFT         7
  28
  29#define ROM_BLOCK_SIZE          512
  30#define ROM_BLOCK_MASK          (~(ROM_BLOCK_SIZE - 1))
  31
  32typedef enum VAPICMode {
  33    VAPIC_INACTIVE = 0,
  34    VAPIC_ACTIVE   = 1,
  35    VAPIC_STANDBY  = 2,
  36} VAPICMode;
  37
  38typedef struct VAPICHandlers {
  39    uint32_t set_tpr;
  40    uint32_t set_tpr_eax;
  41    uint32_t get_tpr[8];
  42    uint32_t get_tpr_stack;
  43} QEMU_PACKED VAPICHandlers;
  44
  45typedef struct GuestROMState {
  46    char signature[8];
  47    uint32_t vaddr;
  48    uint32_t fixup_start;
  49    uint32_t fixup_end;
  50    uint32_t vapic_vaddr;
  51    uint32_t vapic_size;
  52    uint32_t vcpu_shift;
  53    uint32_t real_tpr_addr;
  54    VAPICHandlers up;
  55    VAPICHandlers mp;
  56} QEMU_PACKED GuestROMState;
  57
  58struct VAPICROMState {
  59    SysBusDevice busdev;
  60    MemoryRegion io;
  61    MemoryRegion rom;
  62    uint32_t state;
  63    uint32_t rom_state_paddr;
  64    uint32_t rom_state_vaddr;
  65    uint32_t vapic_paddr;
  66    uint32_t real_tpr_addr;
  67    GuestROMState rom_state;
  68    size_t rom_size;
  69    bool rom_mapped_writable;
  70    VMChangeStateEntry *vmsentry;
  71};
  72
  73#define TYPE_VAPIC "kvmvapic"
  74OBJECT_DECLARE_SIMPLE_TYPE(VAPICROMState, VAPIC)
  75
  76#define TPR_INSTR_ABS_MODRM             0x1
  77#define TPR_INSTR_MATCH_MODRM_REG       0x2
  78
  79typedef struct TPRInstruction {
  80    uint8_t opcode;
  81    uint8_t modrm_reg;
  82    unsigned int flags;
  83    TPRAccess access;
  84    size_t length;
  85    off_t addr_offset;
  86} TPRInstruction;
  87
  88/* must be sorted by length, shortest first */
  89static const TPRInstruction tpr_instr[] = {
  90    { /* mov abs to eax */
  91        .opcode = 0xa1,
  92        .access = TPR_ACCESS_READ,
  93        .length = 5,
  94        .addr_offset = 1,
  95    },
  96    { /* mov eax to abs */
  97        .opcode = 0xa3,
  98        .access = TPR_ACCESS_WRITE,
  99        .length = 5,
 100        .addr_offset = 1,
 101    },
 102    { /* mov r32 to r/m32 */
 103        .opcode = 0x89,
 104        .flags = TPR_INSTR_ABS_MODRM,
 105        .access = TPR_ACCESS_WRITE,
 106        .length = 6,
 107        .addr_offset = 2,
 108    },
 109    { /* mov r/m32 to r32 */
 110        .opcode = 0x8b,
 111        .flags = TPR_INSTR_ABS_MODRM,
 112        .access = TPR_ACCESS_READ,
 113        .length = 6,
 114        .addr_offset = 2,
 115    },
 116    { /* push r/m32 */
 117        .opcode = 0xff,
 118        .modrm_reg = 6,
 119        .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
 120        .access = TPR_ACCESS_READ,
 121        .length = 6,
 122        .addr_offset = 2,
 123    },
 124    { /* mov imm32, r/m32 (c7/0) */
 125        .opcode = 0xc7,
 126        .modrm_reg = 0,
 127        .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
 128        .access = TPR_ACCESS_WRITE,
 129        .length = 10,
 130        .addr_offset = 2,
 131    },
 132};
 133
 134static void read_guest_rom_state(VAPICROMState *s)
 135{
 136    cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state,
 137                             sizeof(GuestROMState));
 138}
 139
 140static void write_guest_rom_state(VAPICROMState *s)
 141{
 142    cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state,
 143                              sizeof(GuestROMState));
 144}
 145
 146static void update_guest_rom_state(VAPICROMState *s)
 147{
 148    read_guest_rom_state(s);
 149
 150    s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr);
 151    s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT);
 152
 153    write_guest_rom_state(s);
 154}
 155
 156static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env)
 157{
 158    CPUState *cs = env_cpu(env);
 159    hwaddr paddr;
 160    target_ulong addr;
 161
 162    if (s->state == VAPIC_ACTIVE) {
 163        return 0;
 164    }
 165    /*
 166     * If there is no prior TPR access instruction we could analyze (which is
 167     * the case after resume from hibernation), we need to scan the possible
 168     * virtual address space for the APIC mapping.
 169     */
 170    for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) {
 171        paddr = cpu_get_phys_page_debug(cs, addr);
 172        if (paddr != APIC_DEFAULT_ADDRESS) {
 173            continue;
 174        }
 175        s->real_tpr_addr = addr + 0x80;
 176        update_guest_rom_state(s);
 177        return 0;
 178    }
 179    return -1;
 180}
 181
 182static uint8_t modrm_reg(uint8_t modrm)
 183{
 184    return (modrm >> 3) & 7;
 185}
 186
 187static bool is_abs_modrm(uint8_t modrm)
 188{
 189    return (modrm & 0xc7) == 0x05;
 190}
 191
 192static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr)
 193{
 194    return opcode[0] == instr->opcode &&
 195        (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) &&
 196        (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) ||
 197         modrm_reg(opcode[1]) == instr->modrm_reg);
 198}
 199
 200static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu,
 201                                    target_ulong *pip, TPRAccess access)
 202{
 203    CPUState *cs = CPU(cpu);
 204    const TPRInstruction *instr;
 205    target_ulong ip = *pip;
 206    uint8_t opcode[2];
 207    uint32_t real_tpr_addr;
 208    int i;
 209
 210    if ((ip & 0xf0000000ULL) != 0x80000000ULL &&
 211        (ip & 0xf0000000ULL) != 0xe0000000ULL) {
 212        return -1;
 213    }
 214
 215    /*
 216     * Early Windows 2003 SMP initialization contains a
 217     *
 218     *   mov imm32, r/m32
 219     *
 220     * instruction that is patched by TPR optimization. The problem is that
 221     * RSP, used by the patched instruction, is zero, so the guest gets a
 222     * double fault and dies.
 223     */
 224    if (cpu->env.regs[R_ESP] == 0) {
 225        return -1;
 226    }
 227
 228    if (kvm_enabled() && !kvm_irqchip_in_kernel()) {
 229        /*
 230         * KVM without kernel-based TPR access reporting will pass an IP that
 231         * points after the accessing instruction. So we need to look backward
 232         * to find the reason.
 233         */
 234        for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
 235            instr = &tpr_instr[i];
 236            if (instr->access != access) {
 237                continue;
 238            }
 239            if (cpu_memory_rw_debug(cs, ip - instr->length, opcode,
 240                                    sizeof(opcode), 0) < 0) {
 241                return -1;
 242            }
 243            if (opcode_matches(opcode, instr)) {
 244                ip -= instr->length;
 245                goto instruction_ok;
 246            }
 247        }
 248        return -1;
 249    } else {
 250        if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) {
 251            return -1;
 252        }
 253        for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
 254            instr = &tpr_instr[i];
 255            if (opcode_matches(opcode, instr)) {
 256                goto instruction_ok;
 257            }
 258        }
 259        return -1;
 260    }
 261
 262instruction_ok:
 263    /*
 264     * Grab the virtual TPR address from the instruction
 265     * and update the cached values.
 266     */
 267    if (cpu_memory_rw_debug(cs, ip + instr->addr_offset,
 268                            (void *)&real_tpr_addr,
 269                            sizeof(real_tpr_addr), 0) < 0) {
 270        return -1;
 271    }
 272    real_tpr_addr = le32_to_cpu(real_tpr_addr);
 273    if ((real_tpr_addr & 0xfff) != 0x80) {
 274        return -1;
 275    }
 276    s->real_tpr_addr = real_tpr_addr;
 277    update_guest_rom_state(s);
 278
 279    *pip = ip;
 280    return 0;
 281}
 282
 283static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip)
 284{
 285    CPUState *cs = env_cpu(env);
 286    hwaddr paddr;
 287    uint32_t rom_state_vaddr;
 288    uint32_t pos, patch, offset;
 289
 290    /* nothing to do if already activated */
 291    if (s->state == VAPIC_ACTIVE) {
 292        return 0;
 293    }
 294
 295    /* bail out if ROM init code was not executed (missing ROM?) */
 296    if (s->state == VAPIC_INACTIVE) {
 297        return -1;
 298    }
 299
 300    /* find out virtual address of the ROM */
 301    rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000);
 302    paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr);
 303    if (paddr == -1) {
 304        return -1;
 305    }
 306    paddr += rom_state_vaddr & ~TARGET_PAGE_MASK;
 307    if (paddr != s->rom_state_paddr) {
 308        return -1;
 309    }
 310    read_guest_rom_state(s);
 311    if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) {
 312        return -1;
 313    }
 314    s->rom_state_vaddr = rom_state_vaddr;
 315
 316    /* fixup addresses in ROM if needed */
 317    if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) {
 318        return 0;
 319    }
 320    for (pos = le32_to_cpu(s->rom_state.fixup_start);
 321         pos < le32_to_cpu(s->rom_state.fixup_end);
 322         pos += 4) {
 323        cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr,
 324                                 &offset, sizeof(offset));
 325        offset = le32_to_cpu(offset);
 326        cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch));
 327        patch = le32_to_cpu(patch);
 328        patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr);
 329        patch = cpu_to_le32(patch);
 330        cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch));
 331    }
 332    read_guest_rom_state(s);
 333    s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) -
 334        le32_to_cpu(s->rom_state.vaddr);
 335
 336    return 0;
 337}
 338
 339/*
 340 * Tries to read the unique processor number from the Kernel Processor Control
 341 * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR
 342 * cannot be accessed or is considered invalid. This also ensures that we are
 343 * not patching the wrong guest.
 344 */
 345static int get_kpcr_number(X86CPU *cpu)
 346{
 347    CPUX86State *env = &cpu->env;
 348    struct kpcr {
 349        uint8_t  fill1[0x1c];
 350        uint32_t self;
 351        uint8_t  fill2[0x31];
 352        uint8_t  number;
 353    } QEMU_PACKED kpcr;
 354
 355    if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base,
 356                            (void *)&kpcr, sizeof(kpcr), 0) < 0 ||
 357        kpcr.self != env->segs[R_FS].base) {
 358        return -1;
 359    }
 360    return kpcr.number;
 361}
 362
 363static int vapic_enable(VAPICROMState *s, X86CPU *cpu)
 364{
 365    int cpu_number = get_kpcr_number(cpu);
 366    hwaddr vapic_paddr;
 367    static const uint8_t enabled = 1;
 368
 369    if (cpu_number < 0) {
 370        return -1;
 371    }
 372    vapic_paddr = s->vapic_paddr +
 373        (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT);
 374    cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled),
 375                              &enabled, sizeof(enabled));
 376    apic_enable_vapic(cpu->apic_state, vapic_paddr);
 377
 378    s->state = VAPIC_ACTIVE;
 379
 380    return 0;
 381}
 382
 383static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte)
 384{
 385    cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1);
 386}
 387
 388static void patch_call(X86CPU *cpu, target_ulong ip, uint32_t target)
 389{
 390    uint32_t offset;
 391
 392    offset = cpu_to_le32(target - ip - 5);
 393    patch_byte(cpu, ip, 0xe8); /* call near */
 394    cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1);
 395}
 396
 397typedef struct PatchInfo {
 398    VAPICHandlers *handler;
 399    target_ulong ip;
 400} PatchInfo;
 401
 402static void do_patch_instruction(CPUState *cs, run_on_cpu_data data)
 403{
 404    X86CPU *x86_cpu = X86_CPU(cs);
 405    PatchInfo *info = (PatchInfo *) data.host_ptr;
 406    VAPICHandlers *handlers = info->handler;
 407    target_ulong ip = info->ip;
 408    uint8_t opcode[2];
 409    uint32_t imm32 = 0;
 410
 411    cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0);
 412
 413    switch (opcode[0]) {
 414    case 0x89: /* mov r32 to r/m32 */
 415        patch_byte(x86_cpu, ip, 0x50 + modrm_reg(opcode[1]));  /* push reg */
 416        patch_call(x86_cpu, ip + 1, handlers->set_tpr);
 417        break;
 418    case 0x8b: /* mov r/m32 to r32 */
 419        patch_byte(x86_cpu, ip, 0x90);
 420        patch_call(x86_cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]);
 421        break;
 422    case 0xa1: /* mov abs to eax */
 423        patch_call(x86_cpu, ip, handlers->get_tpr[0]);
 424        break;
 425    case 0xa3: /* mov eax to abs */
 426        patch_call(x86_cpu, ip, handlers->set_tpr_eax);
 427        break;
 428    case 0xc7: /* mov imm32, r/m32 (c7/0) */
 429        patch_byte(x86_cpu, ip, 0x68);  /* push imm32 */
 430        cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0);
 431        cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1);
 432        patch_call(x86_cpu, ip + 5, handlers->set_tpr);
 433        break;
 434    case 0xff: /* push r/m32 */
 435        patch_byte(x86_cpu, ip, 0x50); /* push eax */
 436        patch_call(x86_cpu, ip + 1, handlers->get_tpr_stack);
 437        break;
 438    default:
 439        abort();
 440    }
 441
 442    g_free(info);
 443}
 444
 445static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
 446{
 447    MachineState *ms = MACHINE(qdev_get_machine());
 448    CPUState *cs = CPU(cpu);
 449    VAPICHandlers *handlers;
 450    PatchInfo *info;
 451
 452    if (ms->smp.cpus == 1) {
 453        handlers = &s->rom_state.up;
 454    } else {
 455        handlers = &s->rom_state.mp;
 456    }
 457
 458    info  = g_new(PatchInfo, 1);
 459    info->handler = handlers;
 460    info->ip = ip;
 461
 462    async_safe_run_on_cpu(cs, do_patch_instruction, RUN_ON_CPU_HOST_PTR(info));
 463}
 464
 465void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip,
 466                             TPRAccess access)
 467{
 468    VAPICROMState *s = VAPIC(dev);
 469    X86CPU *cpu = X86_CPU(cs);
 470    CPUX86State *env = &cpu->env;
 471
 472    cpu_synchronize_state(cs);
 473
 474    if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) {
 475        if (s->state == VAPIC_ACTIVE) {
 476            vapic_enable(s, cpu);
 477        }
 478        return;
 479    }
 480    if (update_rom_mapping(s, env, ip) < 0) {
 481        return;
 482    }
 483    if (vapic_enable(s, cpu) < 0) {
 484        return;
 485    }
 486    patch_instruction(s, cpu, ip);
 487}
 488
 489typedef struct VAPICEnableTPRReporting {
 490    DeviceState *apic;
 491    bool enable;
 492} VAPICEnableTPRReporting;
 493
 494static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data)
 495{
 496    VAPICEnableTPRReporting *info = data.host_ptr;
 497    apic_enable_tpr_access_reporting(info->apic, info->enable);
 498}
 499
 500static void vapic_enable_tpr_reporting(bool enable)
 501{
 502    VAPICEnableTPRReporting info = {
 503        .enable = enable,
 504    };
 505    CPUState *cs;
 506    X86CPU *cpu;
 507
 508    CPU_FOREACH(cs) {
 509        cpu = X86_CPU(cs);
 510        info.apic = cpu->apic_state;
 511        run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info));
 512    }
 513}
 514
 515static void vapic_reset(DeviceState *dev)
 516{
 517    VAPICROMState *s = VAPIC(dev);
 518
 519    s->state = VAPIC_INACTIVE;
 520    s->rom_state_paddr = 0;
 521    vapic_enable_tpr_reporting(false);
 522}
 523
 524/*
 525 * Set the IRQ polling hypercalls to the supported variant:
 526 *  - vmcall if using KVM in-kernel irqchip
 527 *  - 32-bit VAPIC port write otherwise
 528 */
 529static int patch_hypercalls(VAPICROMState *s)
 530{
 531    hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
 532    static const uint8_t vmcall_pattern[] = { /* vmcall */
 533        0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1
 534    };
 535    static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */
 536        0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e
 537    };
 538    uint8_t alternates[2];
 539    const uint8_t *pattern;
 540    const uint8_t *patch;
 541    off_t pos;
 542    uint8_t *rom;
 543
 544    rom = g_malloc(s->rom_size);
 545    cpu_physical_memory_read(rom_paddr, rom, s->rom_size);
 546
 547    for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) {
 548        if (kvm_irqchip_in_kernel()) {
 549            pattern = outl_pattern;
 550            alternates[0] = outl_pattern[7];
 551            alternates[1] = outl_pattern[7];
 552            patch = &vmcall_pattern[5];
 553        } else {
 554            pattern = vmcall_pattern;
 555            alternates[0] = vmcall_pattern[7];
 556            alternates[1] = 0xd9; /* AMD's VMMCALL */
 557            patch = &outl_pattern[5];
 558        }
 559        if (memcmp(rom + pos, pattern, 7) == 0 &&
 560            (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) {
 561            cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3);
 562            /*
 563             * Don't flush the tb here. Under ordinary conditions, the patched
 564             * calls are miles away from the current IP. Under malicious
 565             * conditions, the guest could trick us to crash.
 566             */
 567        }
 568    }
 569
 570    g_free(rom);
 571    return 0;
 572}
 573
 574/*
 575 * For TCG mode or the time KVM honors read-only memory regions, we need to
 576 * enable write access to the option ROM so that variables can be updated by
 577 * the guest.
 578 */
 579static int vapic_map_rom_writable(VAPICROMState *s)
 580{
 581    hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
 582    MemoryRegionSection section;
 583    MemoryRegion *as;
 584    size_t rom_size;
 585    uint8_t *ram;
 586
 587    as = sysbus_address_space(&s->busdev);
 588
 589    if (s->rom_mapped_writable) {
 590        memory_region_del_subregion(as, &s->rom);
 591        object_unparent(OBJECT(&s->rom));
 592    }
 593
 594    /* grab RAM memory region (region @rom_paddr may still be pc.rom) */
 595    section = memory_region_find(as, 0, 1);
 596
 597    /* read ROM size from RAM region */
 598    if (rom_paddr + 2 >= memory_region_size(section.mr)) {
 599        return -1;
 600    }
 601    ram = memory_region_get_ram_ptr(section.mr);
 602    rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE;
 603    if (rom_size == 0) {
 604        return -1;
 605    }
 606    s->rom_size = rom_size;
 607
 608    /* We need to round to avoid creating subpages
 609     * from which we cannot run code. */
 610    rom_size += rom_paddr & ~TARGET_PAGE_MASK;
 611    rom_paddr &= TARGET_PAGE_MASK;
 612    rom_size = TARGET_PAGE_ALIGN(rom_size);
 613
 614    memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr,
 615                             rom_paddr, rom_size);
 616    memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000);
 617    s->rom_mapped_writable = true;
 618    memory_region_unref(section.mr);
 619
 620    return 0;
 621}
 622
 623static int vapic_prepare(VAPICROMState *s)
 624{
 625    if (vapic_map_rom_writable(s) < 0) {
 626        return -1;
 627    }
 628
 629    if (patch_hypercalls(s) < 0) {
 630        return -1;
 631    }
 632
 633    vapic_enable_tpr_reporting(true);
 634
 635    return 0;
 636}
 637
 638static void vapic_write(void *opaque, hwaddr addr, uint64_t data,
 639                        unsigned int size)
 640{
 641    VAPICROMState *s = opaque;
 642    X86CPU *cpu;
 643    CPUX86State *env;
 644    hwaddr rom_paddr;
 645
 646    if (!current_cpu) {
 647        return;
 648    }
 649
 650    cpu_synchronize_state(current_cpu);
 651    cpu = X86_CPU(current_cpu);
 652    env = &cpu->env;
 653
 654    /*
 655     * The VAPIC supports two PIO-based hypercalls, both via port 0x7E.
 656     *  o 16-bit write access:
 657     *    Reports the option ROM initialization to the hypervisor. Written
 658     *    value is the offset of the state structure in the ROM.
 659     *  o 8-bit write access:
 660     *    Reactivates the VAPIC after a guest hibernation, i.e. after the
 661     *    option ROM content has been re-initialized by a guest power cycle.
 662     *  o 32-bit write access:
 663     *    Poll for pending IRQs, considering the current VAPIC state.
 664     */
 665    switch (size) {
 666    case 2:
 667        if (s->state == VAPIC_INACTIVE) {
 668            rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK;
 669            s->rom_state_paddr = rom_paddr + data;
 670
 671            s->state = VAPIC_STANDBY;
 672        }
 673        if (vapic_prepare(s) < 0) {
 674            s->state = VAPIC_INACTIVE;
 675            s->rom_state_paddr = 0;
 676            break;
 677        }
 678        break;
 679    case 1:
 680        if (kvm_enabled()) {
 681            /*
 682             * Disable triggering instruction in ROM by writing a NOP.
 683             *
 684             * We cannot do this in TCG mode as the reported IP is not
 685             * accurate.
 686             */
 687            pause_all_vcpus();
 688            patch_byte(cpu, env->eip - 2, 0x66);
 689            patch_byte(cpu, env->eip - 1, 0x90);
 690            resume_all_vcpus();
 691        }
 692
 693        if (s->state == VAPIC_ACTIVE) {
 694            break;
 695        }
 696        if (update_rom_mapping(s, env, env->eip) < 0) {
 697            break;
 698        }
 699        if (find_real_tpr_addr(s, env) < 0) {
 700            break;
 701        }
 702        vapic_enable(s, cpu);
 703        break;
 704    default:
 705    case 4:
 706        if (!kvm_irqchip_in_kernel()) {
 707            apic_poll_irq(cpu->apic_state);
 708        }
 709        break;
 710    }
 711}
 712
 713static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size)
 714{
 715    return 0xffffffff;
 716}
 717
 718static const MemoryRegionOps vapic_ops = {
 719    .write = vapic_write,
 720    .read = vapic_read,
 721    .endianness = DEVICE_NATIVE_ENDIAN,
 722};
 723
 724static void vapic_realize(DeviceState *dev, Error **errp)
 725{
 726    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 727    VAPICROMState *s = VAPIC(dev);
 728
 729    memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2);
 730    sysbus_add_io(sbd, VAPIC_IO_PORT, &s->io);
 731    sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2);
 732
 733    option_rom[nb_option_roms].name = "kvmvapic.bin";
 734    option_rom[nb_option_roms].bootindex = -1;
 735    nb_option_roms++;
 736}
 737
 738static void do_vapic_enable(CPUState *cs, run_on_cpu_data data)
 739{
 740    VAPICROMState *s = data.host_ptr;
 741    X86CPU *cpu = X86_CPU(cs);
 742
 743    static const uint8_t enabled = 1;
 744    cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
 745                              &enabled, sizeof(enabled));
 746    apic_enable_vapic(cpu->apic_state, s->vapic_paddr);
 747    s->state = VAPIC_ACTIVE;
 748}
 749
 750static void kvmvapic_vm_state_change(void *opaque, bool running,
 751                                     RunState state)
 752{
 753    MachineState *ms = MACHINE(qdev_get_machine());
 754    VAPICROMState *s = opaque;
 755    uint8_t *zero;
 756
 757    if (!running) {
 758        return;
 759    }
 760
 761    if (s->state == VAPIC_ACTIVE) {
 762        if (ms->smp.cpus == 1) {
 763            run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s));
 764        } else {
 765            zero = g_malloc0(s->rom_state.vapic_size);
 766            cpu_physical_memory_write(s->vapic_paddr, zero,
 767                                      s->rom_state.vapic_size);
 768            g_free(zero);
 769        }
 770    }
 771
 772    qemu_del_vm_change_state_handler(s->vmsentry);
 773    s->vmsentry = NULL;
 774}
 775
 776static int vapic_post_load(void *opaque, int version_id)
 777{
 778    VAPICROMState *s = opaque;
 779
 780    /*
 781     * The old implementation of qemu-kvm did not provide the state
 782     * VAPIC_STANDBY. Reconstruct it.
 783     */
 784    if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) {
 785        s->state = VAPIC_STANDBY;
 786    }
 787
 788    if (s->state != VAPIC_INACTIVE) {
 789        if (vapic_prepare(s) < 0) {
 790            return -1;
 791        }
 792    }
 793
 794    if (!s->vmsentry) {
 795        s->vmsentry =
 796            qemu_add_vm_change_state_handler(kvmvapic_vm_state_change, s);
 797    }
 798    return 0;
 799}
 800
 801static const VMStateDescription vmstate_handlers = {
 802    .name = "kvmvapic-handlers",
 803    .version_id = 1,
 804    .minimum_version_id = 1,
 805    .fields = (VMStateField[]) {
 806        VMSTATE_UINT32(set_tpr, VAPICHandlers),
 807        VMSTATE_UINT32(set_tpr_eax, VAPICHandlers),
 808        VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8),
 809        VMSTATE_UINT32(get_tpr_stack, VAPICHandlers),
 810        VMSTATE_END_OF_LIST()
 811    }
 812};
 813
 814static const VMStateDescription vmstate_guest_rom = {
 815    .name = "kvmvapic-guest-rom",
 816    .version_id = 1,
 817    .minimum_version_id = 1,
 818    .fields = (VMStateField[]) {
 819        VMSTATE_UNUSED(8),     /* signature */
 820        VMSTATE_UINT32(vaddr, GuestROMState),
 821        VMSTATE_UINT32(fixup_start, GuestROMState),
 822        VMSTATE_UINT32(fixup_end, GuestROMState),
 823        VMSTATE_UINT32(vapic_vaddr, GuestROMState),
 824        VMSTATE_UINT32(vapic_size, GuestROMState),
 825        VMSTATE_UINT32(vcpu_shift, GuestROMState),
 826        VMSTATE_UINT32(real_tpr_addr, GuestROMState),
 827        VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
 828        VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
 829        VMSTATE_END_OF_LIST()
 830    }
 831};
 832
 833static const VMStateDescription vmstate_vapic = {
 834    .name = "kvm-tpr-opt",      /* compatible with qemu-kvm VAPIC */
 835    .version_id = 1,
 836    .minimum_version_id = 1,
 837    .post_load = vapic_post_load,
 838    .fields = (VMStateField[]) {
 839        VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom,
 840                       GuestROMState),
 841        VMSTATE_UINT32(state, VAPICROMState),
 842        VMSTATE_UINT32(real_tpr_addr, VAPICROMState),
 843        VMSTATE_UINT32(rom_state_vaddr, VAPICROMState),
 844        VMSTATE_UINT32(vapic_paddr, VAPICROMState),
 845        VMSTATE_UINT32(rom_state_paddr, VAPICROMState),
 846        VMSTATE_END_OF_LIST()
 847    }
 848};
 849
 850static void vapic_class_init(ObjectClass *klass, void *data)
 851{
 852    DeviceClass *dc = DEVICE_CLASS(klass);
 853
 854    dc->reset   = vapic_reset;
 855    dc->vmsd    = &vmstate_vapic;
 856    dc->realize = vapic_realize;
 857}
 858
 859static const TypeInfo vapic_type = {
 860    .name          = TYPE_VAPIC,
 861    .parent        = TYPE_SYS_BUS_DEVICE,
 862    .instance_size = sizeof(VAPICROMState),
 863    .class_init    = vapic_class_init,
 864};
 865
 866static void vapic_register(void)
 867{
 868    type_register_static(&vapic_type);
 869}
 870
 871type_init(vapic_register);
 872