qemu/hw/i386/kvmvapic.c
<<
>>
Prefs
   1/*
   2 * TPR optimization for 32-bit Windows guests (XP and Server 2003)
   3 *
   4 * Copyright (C) 2007-2008 Qumranet Technologies
   5 * Copyright (C) 2012      Jan Kiszka, Siemens AG
   6 *
   7 * This work is licensed under the terms of the GNU GPL version 2, or
   8 * (at your option) any later version. See the COPYING file in the
   9 * top-level directory.
  10 */
  11#include "qemu/osdep.h"
  12#include "sysemu/sysemu.h"
  13#include "sysemu/cpus.h"
  14#include "sysemu/kvm.h"
  15#include "hw/i386/apic_internal.h"
  16#include "hw/sysbus.h"
  17
  18#define VAPIC_IO_PORT           0x7e
  19
  20#define VAPIC_CPU_SHIFT         7
  21
  22#define ROM_BLOCK_SIZE          512
  23#define ROM_BLOCK_MASK          (~(ROM_BLOCK_SIZE - 1))
  24
  25typedef enum VAPICMode {
  26    VAPIC_INACTIVE = 0,
  27    VAPIC_ACTIVE   = 1,
  28    VAPIC_STANDBY  = 2,
  29} VAPICMode;
  30
  31typedef struct VAPICHandlers {
  32    uint32_t set_tpr;
  33    uint32_t set_tpr_eax;
  34    uint32_t get_tpr[8];
  35    uint32_t get_tpr_stack;
  36} QEMU_PACKED VAPICHandlers;
  37
  38typedef struct GuestROMState {
  39    char signature[8];
  40    uint32_t vaddr;
  41    uint32_t fixup_start;
  42    uint32_t fixup_end;
  43    uint32_t vapic_vaddr;
  44    uint32_t vapic_size;
  45    uint32_t vcpu_shift;
  46    uint32_t real_tpr_addr;
  47    VAPICHandlers up;
  48    VAPICHandlers mp;
  49} QEMU_PACKED GuestROMState;
  50
  51typedef struct VAPICROMState {
  52    SysBusDevice busdev;
  53    MemoryRegion io;
  54    MemoryRegion rom;
  55    uint32_t state;
  56    uint32_t rom_state_paddr;
  57    uint32_t rom_state_vaddr;
  58    uint32_t vapic_paddr;
  59    uint32_t real_tpr_addr;
  60    GuestROMState rom_state;
  61    size_t rom_size;
  62    bool rom_mapped_writable;
  63    VMChangeStateEntry *vmsentry;
  64} VAPICROMState;
  65
  66#define TYPE_VAPIC "kvmvapic"
  67#define VAPIC(obj) OBJECT_CHECK(VAPICROMState, (obj), TYPE_VAPIC)
  68
  69#define TPR_INSTR_ABS_MODRM             0x1
  70#define TPR_INSTR_MATCH_MODRM_REG       0x2
  71
  72typedef struct TPRInstruction {
  73    uint8_t opcode;
  74    uint8_t modrm_reg;
  75    unsigned int flags;
  76    TPRAccess access;
  77    size_t length;
  78    off_t addr_offset;
  79} TPRInstruction;
  80
  81/* must be sorted by length, shortest first */
  82static const TPRInstruction tpr_instr[] = {
  83    { /* mov abs to eax */
  84        .opcode = 0xa1,
  85        .access = TPR_ACCESS_READ,
  86        .length = 5,
  87        .addr_offset = 1,
  88    },
  89    { /* mov eax to abs */
  90        .opcode = 0xa3,
  91        .access = TPR_ACCESS_WRITE,
  92        .length = 5,
  93        .addr_offset = 1,
  94    },
  95    { /* mov r32 to r/m32 */
  96        .opcode = 0x89,
  97        .flags = TPR_INSTR_ABS_MODRM,
  98        .access = TPR_ACCESS_WRITE,
  99        .length = 6,
 100        .addr_offset = 2,
 101    },
 102    { /* mov r/m32 to r32 */
 103        .opcode = 0x8b,
 104        .flags = TPR_INSTR_ABS_MODRM,
 105        .access = TPR_ACCESS_READ,
 106        .length = 6,
 107        .addr_offset = 2,
 108    },
 109    { /* push r/m32 */
 110        .opcode = 0xff,
 111        .modrm_reg = 6,
 112        .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
 113        .access = TPR_ACCESS_READ,
 114        .length = 6,
 115        .addr_offset = 2,
 116    },
 117    { /* mov imm32, r/m32 (c7/0) */
 118        .opcode = 0xc7,
 119        .modrm_reg = 0,
 120        .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
 121        .access = TPR_ACCESS_WRITE,
 122        .length = 10,
 123        .addr_offset = 2,
 124    },
 125};
 126
 127static void read_guest_rom_state(VAPICROMState *s)
 128{
 129    cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state,
 130                             sizeof(GuestROMState));
 131}
 132
 133static void write_guest_rom_state(VAPICROMState *s)
 134{
 135    cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state,
 136                              sizeof(GuestROMState));
 137}
 138
 139static void update_guest_rom_state(VAPICROMState *s)
 140{
 141    read_guest_rom_state(s);
 142
 143    s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr);
 144    s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT);
 145
 146    write_guest_rom_state(s);
 147}
 148
 149static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env)
 150{
 151    CPUState *cs = CPU(x86_env_get_cpu(env));
 152    hwaddr paddr;
 153    target_ulong addr;
 154
 155    if (s->state == VAPIC_ACTIVE) {
 156        return 0;
 157    }
 158    /*
 159     * If there is no prior TPR access instruction we could analyze (which is
 160     * the case after resume from hibernation), we need to scan the possible
 161     * virtual address space for the APIC mapping.
 162     */
 163    for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) {
 164        paddr = cpu_get_phys_page_debug(cs, addr);
 165        if (paddr != APIC_DEFAULT_ADDRESS) {
 166            continue;
 167        }
 168        s->real_tpr_addr = addr + 0x80;
 169        update_guest_rom_state(s);
 170        return 0;
 171    }
 172    return -1;
 173}
 174
 175static uint8_t modrm_reg(uint8_t modrm)
 176{
 177    return (modrm >> 3) & 7;
 178}
 179
 180static bool is_abs_modrm(uint8_t modrm)
 181{
 182    return (modrm & 0xc7) == 0x05;
 183}
 184
 185static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr)
 186{
 187    return opcode[0] == instr->opcode &&
 188        (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) &&
 189        (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) ||
 190         modrm_reg(opcode[1]) == instr->modrm_reg);
 191}
 192
 193static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu,
 194                                    target_ulong *pip, TPRAccess access)
 195{
 196    CPUState *cs = CPU(cpu);
 197    const TPRInstruction *instr;
 198    target_ulong ip = *pip;
 199    uint8_t opcode[2];
 200    uint32_t real_tpr_addr;
 201    int i;
 202
 203    if ((ip & 0xf0000000ULL) != 0x80000000ULL &&
 204        (ip & 0xf0000000ULL) != 0xe0000000ULL) {
 205        return -1;
 206    }
 207
 208    /*
 209     * Early Windows 2003 SMP initialization contains a
 210     *
 211     *   mov imm32, r/m32
 212     *
 213     * instruction that is patched by TPR optimization. The problem is that
 214     * RSP, used by the patched instruction, is zero, so the guest gets a
 215     * double fault and dies.
 216     */
 217    if (cpu->env.regs[R_ESP] == 0) {
 218        return -1;
 219    }
 220
 221    if (kvm_enabled() && !kvm_irqchip_in_kernel()) {
 222        /*
 223         * KVM without kernel-based TPR access reporting will pass an IP that
 224         * points after the accessing instruction. So we need to look backward
 225         * to find the reason.
 226         */
 227        for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
 228            instr = &tpr_instr[i];
 229            if (instr->access != access) {
 230                continue;
 231            }
 232            if (cpu_memory_rw_debug(cs, ip - instr->length, opcode,
 233                                    sizeof(opcode), 0) < 0) {
 234                return -1;
 235            }
 236            if (opcode_matches(opcode, instr)) {
 237                ip -= instr->length;
 238                goto instruction_ok;
 239            }
 240        }
 241        return -1;
 242    } else {
 243        if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) {
 244            return -1;
 245        }
 246        for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
 247            instr = &tpr_instr[i];
 248            if (opcode_matches(opcode, instr)) {
 249                goto instruction_ok;
 250            }
 251        }
 252        return -1;
 253    }
 254
 255instruction_ok:
 256    /*
 257     * Grab the virtual TPR address from the instruction
 258     * and update the cached values.
 259     */
 260    if (cpu_memory_rw_debug(cs, ip + instr->addr_offset,
 261                            (void *)&real_tpr_addr,
 262                            sizeof(real_tpr_addr), 0) < 0) {
 263        return -1;
 264    }
 265    real_tpr_addr = le32_to_cpu(real_tpr_addr);
 266    if ((real_tpr_addr & 0xfff) != 0x80) {
 267        return -1;
 268    }
 269    s->real_tpr_addr = real_tpr_addr;
 270    update_guest_rom_state(s);
 271
 272    *pip = ip;
 273    return 0;
 274}
 275
 276static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip)
 277{
 278    CPUState *cs = CPU(x86_env_get_cpu(env));
 279    hwaddr paddr;
 280    uint32_t rom_state_vaddr;
 281    uint32_t pos, patch, offset;
 282
 283    /* nothing to do if already activated */
 284    if (s->state == VAPIC_ACTIVE) {
 285        return 0;
 286    }
 287
 288    /* bail out if ROM init code was not executed (missing ROM?) */
 289    if (s->state == VAPIC_INACTIVE) {
 290        return -1;
 291    }
 292
 293    /* find out virtual address of the ROM */
 294    rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000);
 295    paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr);
 296    if (paddr == -1) {
 297        return -1;
 298    }
 299    paddr += rom_state_vaddr & ~TARGET_PAGE_MASK;
 300    if (paddr != s->rom_state_paddr) {
 301        return -1;
 302    }
 303    read_guest_rom_state(s);
 304    if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) {
 305        return -1;
 306    }
 307    s->rom_state_vaddr = rom_state_vaddr;
 308
 309    /* fixup addresses in ROM if needed */
 310    if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) {
 311        return 0;
 312    }
 313    for (pos = le32_to_cpu(s->rom_state.fixup_start);
 314         pos < le32_to_cpu(s->rom_state.fixup_end);
 315         pos += 4) {
 316        cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr,
 317                                 &offset, sizeof(offset));
 318        offset = le32_to_cpu(offset);
 319        cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch));
 320        patch = le32_to_cpu(patch);
 321        patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr);
 322        patch = cpu_to_le32(patch);
 323        cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch));
 324    }
 325    read_guest_rom_state(s);
 326    s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) -
 327        le32_to_cpu(s->rom_state.vaddr);
 328
 329    return 0;
 330}
 331
 332/*
 333 * Tries to read the unique processor number from the Kernel Processor Control
 334 * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR
 335 * cannot be accessed or is considered invalid. This also ensures that we are
 336 * not patching the wrong guest.
 337 */
 338static int get_kpcr_number(X86CPU *cpu)
 339{
 340    CPUX86State *env = &cpu->env;
 341    struct kpcr {
 342        uint8_t  fill1[0x1c];
 343        uint32_t self;
 344        uint8_t  fill2[0x31];
 345        uint8_t  number;
 346    } QEMU_PACKED kpcr;
 347
 348    if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base,
 349                            (void *)&kpcr, sizeof(kpcr), 0) < 0 ||
 350        kpcr.self != env->segs[R_FS].base) {
 351        return -1;
 352    }
 353    return kpcr.number;
 354}
 355
 356static int vapic_enable(VAPICROMState *s, X86CPU *cpu)
 357{
 358    int cpu_number = get_kpcr_number(cpu);
 359    hwaddr vapic_paddr;
 360    static const uint8_t enabled = 1;
 361
 362    if (cpu_number < 0) {
 363        return -1;
 364    }
 365    vapic_paddr = s->vapic_paddr +
 366        (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT);
 367    cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled),
 368                              &enabled, sizeof(enabled));
 369    apic_enable_vapic(cpu->apic_state, vapic_paddr);
 370
 371    s->state = VAPIC_ACTIVE;
 372
 373    return 0;
 374}
 375
 376static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte)
 377{
 378    cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1);
 379}
 380
 381static void patch_call(VAPICROMState *s, X86CPU *cpu, target_ulong ip,
 382                       uint32_t target)
 383{
 384    uint32_t offset;
 385
 386    offset = cpu_to_le32(target - ip - 5);
 387    patch_byte(cpu, ip, 0xe8); /* call near */
 388    cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1);
 389}
 390
 391static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
 392{
 393    CPUState *cs = CPU(cpu);
 394    CPUX86State *env = &cpu->env;
 395    VAPICHandlers *handlers;
 396    uint8_t opcode[2];
 397    uint32_t imm32;
 398    target_ulong current_pc = 0;
 399    target_ulong current_cs_base = 0;
 400    int current_flags = 0;
 401
 402    if (smp_cpus == 1) {
 403        handlers = &s->rom_state.up;
 404    } else {
 405        handlers = &s->rom_state.mp;
 406    }
 407
 408    if (!kvm_enabled()) {
 409        cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
 410                             &current_flags);
 411    }
 412
 413    pause_all_vcpus();
 414
 415    cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0);
 416
 417    switch (opcode[0]) {
 418    case 0x89: /* mov r32 to r/m32 */
 419        patch_byte(cpu, ip, 0x50 + modrm_reg(opcode[1]));  /* push reg */
 420        patch_call(s, cpu, ip + 1, handlers->set_tpr);
 421        break;
 422    case 0x8b: /* mov r/m32 to r32 */
 423        patch_byte(cpu, ip, 0x90);
 424        patch_call(s, cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]);
 425        break;
 426    case 0xa1: /* mov abs to eax */
 427        patch_call(s, cpu, ip, handlers->get_tpr[0]);
 428        break;
 429    case 0xa3: /* mov eax to abs */
 430        patch_call(s, cpu, ip, handlers->set_tpr_eax);
 431        break;
 432    case 0xc7: /* mov imm32, r/m32 (c7/0) */
 433        patch_byte(cpu, ip, 0x68);  /* push imm32 */
 434        cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0);
 435        cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1);
 436        patch_call(s, cpu, ip + 5, handlers->set_tpr);
 437        break;
 438    case 0xff: /* push r/m32 */
 439        patch_byte(cpu, ip, 0x50); /* push eax */
 440        patch_call(s, cpu, ip + 1, handlers->get_tpr_stack);
 441        break;
 442    default:
 443        abort();
 444    }
 445
 446    resume_all_vcpus();
 447
 448    if (!kvm_enabled()) {
 449        cs->current_tb = NULL;
 450        tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
 451        cpu_resume_from_signal(cs, NULL);
 452    }
 453}
 454
 455void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip,
 456                             TPRAccess access)
 457{
 458    VAPICROMState *s = VAPIC(dev);
 459    X86CPU *cpu = X86_CPU(cs);
 460    CPUX86State *env = &cpu->env;
 461
 462    cpu_synchronize_state(cs);
 463
 464    if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) {
 465        if (s->state == VAPIC_ACTIVE) {
 466            vapic_enable(s, cpu);
 467        }
 468        return;
 469    }
 470    if (update_rom_mapping(s, env, ip) < 0) {
 471        return;
 472    }
 473    if (vapic_enable(s, cpu) < 0) {
 474        return;
 475    }
 476    patch_instruction(s, cpu, ip);
 477}
 478
 479typedef struct VAPICEnableTPRReporting {
 480    DeviceState *apic;
 481    bool enable;
 482} VAPICEnableTPRReporting;
 483
 484static void vapic_do_enable_tpr_reporting(void *data)
 485{
 486    VAPICEnableTPRReporting *info = data;
 487
 488    apic_enable_tpr_access_reporting(info->apic, info->enable);
 489}
 490
 491static void vapic_enable_tpr_reporting(bool enable)
 492{
 493    VAPICEnableTPRReporting info = {
 494        .enable = enable,
 495    };
 496    CPUState *cs;
 497    X86CPU *cpu;
 498
 499    CPU_FOREACH(cs) {
 500        cpu = X86_CPU(cs);
 501        info.apic = cpu->apic_state;
 502        run_on_cpu(cs, vapic_do_enable_tpr_reporting, &info);
 503    }
 504}
 505
 506static void vapic_reset(DeviceState *dev)
 507{
 508    VAPICROMState *s = VAPIC(dev);
 509
 510    s->state = VAPIC_INACTIVE;
 511    s->rom_state_paddr = 0;
 512    vapic_enable_tpr_reporting(false);
 513}
 514
 515/*
 516 * Set the IRQ polling hypercalls to the supported variant:
 517 *  - vmcall if using KVM in-kernel irqchip
 518 *  - 32-bit VAPIC port write otherwise
 519 */
 520static int patch_hypercalls(VAPICROMState *s)
 521{
 522    hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
 523    static const uint8_t vmcall_pattern[] = { /* vmcall */
 524        0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1
 525    };
 526    static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */
 527        0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e
 528    };
 529    uint8_t alternates[2];
 530    const uint8_t *pattern;
 531    const uint8_t *patch;
 532    int patches = 0;
 533    off_t pos;
 534    uint8_t *rom;
 535
 536    rom = g_malloc(s->rom_size);
 537    cpu_physical_memory_read(rom_paddr, rom, s->rom_size);
 538
 539    for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) {
 540        if (kvm_irqchip_in_kernel()) {
 541            pattern = outl_pattern;
 542            alternates[0] = outl_pattern[7];
 543            alternates[1] = outl_pattern[7];
 544            patch = &vmcall_pattern[5];
 545        } else {
 546            pattern = vmcall_pattern;
 547            alternates[0] = vmcall_pattern[7];
 548            alternates[1] = 0xd9; /* AMD's VMMCALL */
 549            patch = &outl_pattern[5];
 550        }
 551        if (memcmp(rom + pos, pattern, 7) == 0 &&
 552            (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) {
 553            cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3);
 554            /*
 555             * Don't flush the tb here. Under ordinary conditions, the patched
 556             * calls are miles away from the current IP. Under malicious
 557             * conditions, the guest could trick us to crash.
 558             */
 559        }
 560    }
 561
 562    g_free(rom);
 563
 564    if (patches != 0 && patches != 2) {
 565        return -1;
 566    }
 567
 568    return 0;
 569}
 570
 571/*
 572 * For TCG mode or the time KVM honors read-only memory regions, we need to
 573 * enable write access to the option ROM so that variables can be updated by
 574 * the guest.
 575 */
 576static int vapic_map_rom_writable(VAPICROMState *s)
 577{
 578    hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
 579    MemoryRegionSection section;
 580    MemoryRegion *as;
 581    size_t rom_size;
 582    uint8_t *ram;
 583
 584    as = sysbus_address_space(&s->busdev);
 585
 586    if (s->rom_mapped_writable) {
 587        memory_region_del_subregion(as, &s->rom);
 588        object_unparent(OBJECT(&s->rom));
 589    }
 590
 591    /* grab RAM memory region (region @rom_paddr may still be pc.rom) */
 592    section = memory_region_find(as, 0, 1);
 593
 594    /* read ROM size from RAM region */
 595    if (rom_paddr + 2 >= memory_region_size(section.mr)) {
 596        return -1;
 597    }
 598    ram = memory_region_get_ram_ptr(section.mr);
 599    rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE;
 600    if (rom_size == 0) {
 601        return -1;
 602    }
 603    s->rom_size = rom_size;
 604
 605    /* We need to round to avoid creating subpages
 606     * from which we cannot run code. */
 607    rom_size += rom_paddr & ~TARGET_PAGE_MASK;
 608    rom_paddr &= TARGET_PAGE_MASK;
 609    rom_size = TARGET_PAGE_ALIGN(rom_size);
 610
 611    memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr,
 612                             rom_paddr, rom_size);
 613    memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000);
 614    s->rom_mapped_writable = true;
 615    memory_region_unref(section.mr);
 616
 617    return 0;
 618}
 619
 620static int vapic_prepare(VAPICROMState *s)
 621{
 622    if (vapic_map_rom_writable(s) < 0) {
 623        return -1;
 624    }
 625
 626    if (patch_hypercalls(s) < 0) {
 627        return -1;
 628    }
 629
 630    vapic_enable_tpr_reporting(true);
 631
 632    return 0;
 633}
 634
 635static void vapic_write(void *opaque, hwaddr addr, uint64_t data,
 636                        unsigned int size)
 637{
 638    VAPICROMState *s = opaque;
 639    X86CPU *cpu;
 640    CPUX86State *env;
 641    hwaddr rom_paddr;
 642
 643    if (!current_cpu) {
 644        return;
 645    }
 646
 647    cpu_synchronize_state(current_cpu);
 648    cpu = X86_CPU(current_cpu);
 649    env = &cpu->env;
 650
 651    /*
 652     * The VAPIC supports two PIO-based hypercalls, both via port 0x7E.
 653     *  o 16-bit write access:
 654     *    Reports the option ROM initialization to the hypervisor. Written
 655     *    value is the offset of the state structure in the ROM.
 656     *  o 8-bit write access:
 657     *    Reactivates the VAPIC after a guest hibernation, i.e. after the
 658     *    option ROM content has been re-initialized by a guest power cycle.
 659     *  o 32-bit write access:
 660     *    Poll for pending IRQs, considering the current VAPIC state.
 661     */
 662    switch (size) {
 663    case 2:
 664        if (s->state == VAPIC_INACTIVE) {
 665            rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK;
 666            s->rom_state_paddr = rom_paddr + data;
 667
 668            s->state = VAPIC_STANDBY;
 669        }
 670        if (vapic_prepare(s) < 0) {
 671            s->state = VAPIC_INACTIVE;
 672            s->rom_state_paddr = 0;
 673            break;
 674        }
 675        break;
 676    case 1:
 677        if (kvm_enabled()) {
 678            /*
 679             * Disable triggering instruction in ROM by writing a NOP.
 680             *
 681             * We cannot do this in TCG mode as the reported IP is not
 682             * accurate.
 683             */
 684            pause_all_vcpus();
 685            patch_byte(cpu, env->eip - 2, 0x66);
 686            patch_byte(cpu, env->eip - 1, 0x90);
 687            resume_all_vcpus();
 688        }
 689
 690        if (s->state == VAPIC_ACTIVE) {
 691            break;
 692        }
 693        if (update_rom_mapping(s, env, env->eip) < 0) {
 694            break;
 695        }
 696        if (find_real_tpr_addr(s, env) < 0) {
 697            break;
 698        }
 699        vapic_enable(s, cpu);
 700        break;
 701    default:
 702    case 4:
 703        if (!kvm_irqchip_in_kernel()) {
 704            apic_poll_irq(cpu->apic_state);
 705        }
 706        break;
 707    }
 708}
 709
 710static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size)
 711{
 712    return 0xffffffff;
 713}
 714
 715static const MemoryRegionOps vapic_ops = {
 716    .write = vapic_write,
 717    .read = vapic_read,
 718    .endianness = DEVICE_NATIVE_ENDIAN,
 719};
 720
 721static void vapic_realize(DeviceState *dev, Error **errp)
 722{
 723    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 724    VAPICROMState *s = VAPIC(dev);
 725
 726    memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2);
 727    sysbus_add_io(sbd, VAPIC_IO_PORT, &s->io);
 728    sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2);
 729
 730    option_rom[nb_option_roms].name = "kvmvapic.bin";
 731    option_rom[nb_option_roms].bootindex = -1;
 732    nb_option_roms++;
 733}
 734
 735static void do_vapic_enable(void *data)
 736{
 737    VAPICROMState *s = data;
 738    X86CPU *cpu = X86_CPU(first_cpu);
 739
 740    static const uint8_t enabled = 1;
 741    cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
 742                              &enabled, sizeof(enabled));
 743    apic_enable_vapic(cpu->apic_state, s->vapic_paddr);
 744    s->state = VAPIC_ACTIVE;
 745}
 746
 747static void kvmvapic_vm_state_change(void *opaque, int running,
 748                                     RunState state)
 749{
 750    VAPICROMState *s = opaque;
 751    uint8_t *zero;
 752
 753    if (!running) {
 754        return;
 755    }
 756
 757    if (s->state == VAPIC_ACTIVE) {
 758        if (smp_cpus == 1) {
 759            run_on_cpu(first_cpu, do_vapic_enable, s);
 760        } else {
 761            zero = g_malloc0(s->rom_state.vapic_size);
 762            cpu_physical_memory_write(s->vapic_paddr, zero,
 763                                      s->rom_state.vapic_size);
 764            g_free(zero);
 765        }
 766    }
 767
 768    qemu_del_vm_change_state_handler(s->vmsentry);
 769}
 770
 771static int vapic_post_load(void *opaque, int version_id)
 772{
 773    VAPICROMState *s = opaque;
 774
 775    /*
 776     * The old implementation of qemu-kvm did not provide the state
 777     * VAPIC_STANDBY. Reconstruct it.
 778     */
 779    if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) {
 780        s->state = VAPIC_STANDBY;
 781    }
 782
 783    if (s->state != VAPIC_INACTIVE) {
 784        if (vapic_prepare(s) < 0) {
 785            return -1;
 786        }
 787    }
 788
 789    if (!s->vmsentry) {
 790        s->vmsentry =
 791            qemu_add_vm_change_state_handler(kvmvapic_vm_state_change, s);
 792    }
 793    return 0;
 794}
 795
 796static const VMStateDescription vmstate_handlers = {
 797    .name = "kvmvapic-handlers",
 798    .version_id = 1,
 799    .minimum_version_id = 1,
 800    .fields = (VMStateField[]) {
 801        VMSTATE_UINT32(set_tpr, VAPICHandlers),
 802        VMSTATE_UINT32(set_tpr_eax, VAPICHandlers),
 803        VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8),
 804        VMSTATE_UINT32(get_tpr_stack, VAPICHandlers),
 805        VMSTATE_END_OF_LIST()
 806    }
 807};
 808
 809static const VMStateDescription vmstate_guest_rom = {
 810    .name = "kvmvapic-guest-rom",
 811    .version_id = 1,
 812    .minimum_version_id = 1,
 813    .fields = (VMStateField[]) {
 814        VMSTATE_UNUSED(8),     /* signature */
 815        VMSTATE_UINT32(vaddr, GuestROMState),
 816        VMSTATE_UINT32(fixup_start, GuestROMState),
 817        VMSTATE_UINT32(fixup_end, GuestROMState),
 818        VMSTATE_UINT32(vapic_vaddr, GuestROMState),
 819        VMSTATE_UINT32(vapic_size, GuestROMState),
 820        VMSTATE_UINT32(vcpu_shift, GuestROMState),
 821        VMSTATE_UINT32(real_tpr_addr, GuestROMState),
 822        VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
 823        VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
 824        VMSTATE_END_OF_LIST()
 825    }
 826};
 827
 828static const VMStateDescription vmstate_vapic = {
 829    .name = "kvm-tpr-opt",      /* compatible with qemu-kvm VAPIC */
 830    .version_id = 1,
 831    .minimum_version_id = 1,
 832    .post_load = vapic_post_load,
 833    .fields = (VMStateField[]) {
 834        VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom,
 835                       GuestROMState),
 836        VMSTATE_UINT32(state, VAPICROMState),
 837        VMSTATE_UINT32(real_tpr_addr, VAPICROMState),
 838        VMSTATE_UINT32(rom_state_vaddr, VAPICROMState),
 839        VMSTATE_UINT32(vapic_paddr, VAPICROMState),
 840        VMSTATE_UINT32(rom_state_paddr, VAPICROMState),
 841        VMSTATE_END_OF_LIST()
 842    }
 843};
 844
 845static void vapic_class_init(ObjectClass *klass, void *data)
 846{
 847    DeviceClass *dc = DEVICE_CLASS(klass);
 848
 849    dc->reset   = vapic_reset;
 850    dc->vmsd    = &vmstate_vapic;
 851    dc->realize = vapic_realize;
 852}
 853
 854static const TypeInfo vapic_type = {
 855    .name          = TYPE_VAPIC,
 856    .parent        = TYPE_SYS_BUS_DEVICE,
 857    .instance_size = sizeof(VAPICROMState),
 858    .class_init    = vapic_class_init,
 859};
 860
 861static void vapic_register(void)
 862{
 863    type_register_static(&vapic_type);
 864}
 865
 866type_init(vapic_register);
 867