qemu/target/i386/hvf/hvf.c
<<
>>
Prefs
   1/* Copyright 2008 IBM Corporation
   2 *           2008 Red Hat, Inc.
   3 * Copyright 2011 Intel Corporation
   4 * Copyright 2016 Veertu, Inc.
   5 * Copyright 2017 The Android Open Source Project
   6 *
   7 * QEMU Hypervisor.framework support
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of version 2 of the GNU General Public
  11 * License as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, see <http://www.gnu.org/licenses/>.
  20 *
  21 * This file contain code under public domain from the hvdos project:
  22 * https://github.com/mist64/hvdos
  23 *
  24 * Parts Copyright (c) 2011 NetApp, Inc.
  25 * All rights reserved.
  26 *
  27 * Redistribution and use in source and binary forms, with or without
  28 * modification, are permitted provided that the following conditions
  29 * are met:
  30 * 1. Redistributions of source code must retain the above copyright
  31 *    notice, this list of conditions and the following disclaimer.
  32 * 2. Redistributions in binary form must reproduce the above copyright
  33 *    notice, this list of conditions and the following disclaimer in the
  34 *    documentation and/or other materials provided with the distribution.
  35 *
  36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46 * SUCH DAMAGE.
  47 */
  48
  49#include "qemu/osdep.h"
  50#include "qemu-common.h"
  51#include "qemu/error-report.h"
  52
  53#include "sysemu/hvf.h"
  54#include "sysemu/runstate.h"
  55#include "hvf-i386.h"
  56#include "vmcs.h"
  57#include "vmx.h"
  58#include "x86.h"
  59#include "x86_descr.h"
  60#include "x86_mmu.h"
  61#include "x86_decode.h"
  62#include "x86_emu.h"
  63#include "x86_task.h"
  64#include "x86hvf.h"
  65
  66#include <Hypervisor/hv.h>
  67#include <Hypervisor/hv_vmx.h>
  68
  69#include "exec/address-spaces.h"
  70#include "hw/i386/apic_internal.h"
  71#include "qemu/main-loop.h"
  72#include "sysemu/accel.h"
  73#include "target/i386/cpu.h"
  74
  75HVFState *hvf_state;
  76
  77static void assert_hvf_ok(hv_return_t ret)
  78{
  79    if (ret == HV_SUCCESS) {
  80        return;
  81    }
  82
  83    switch (ret) {
  84    case HV_ERROR:
  85        error_report("Error: HV_ERROR");
  86        break;
  87    case HV_BUSY:
  88        error_report("Error: HV_BUSY");
  89        break;
  90    case HV_BAD_ARGUMENT:
  91        error_report("Error: HV_BAD_ARGUMENT");
  92        break;
  93    case HV_NO_RESOURCES:
  94        error_report("Error: HV_NO_RESOURCES");
  95        break;
  96    case HV_NO_DEVICE:
  97        error_report("Error: HV_NO_DEVICE");
  98        break;
  99    case HV_UNSUPPORTED:
 100        error_report("Error: HV_UNSUPPORTED");
 101        break;
 102    default:
 103        error_report("Unknown Error");
 104    }
 105
 106    abort();
 107}
 108
 109/* Memory slots */
 110hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
 111{
 112    hvf_slot *slot;
 113    int x;
 114    for (x = 0; x < hvf_state->num_slots; ++x) {
 115        slot = &hvf_state->slots[x];
 116        if (slot->size && start < (slot->start + slot->size) &&
 117            (start + size) > slot->start) {
 118            return slot;
 119        }
 120    }
 121    return NULL;
 122}
 123
 124struct mac_slot {
 125    int present;
 126    uint64_t size;
 127    uint64_t gpa_start;
 128    uint64_t gva;
 129};
 130
 131struct mac_slot mac_slots[32];
 132
 133static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
 134{
 135    struct mac_slot *macslot;
 136    hv_return_t ret;
 137
 138    macslot = &mac_slots[slot->slot_id];
 139
 140    if (macslot->present) {
 141        if (macslot->size != slot->size) {
 142            macslot->present = 0;
 143            ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
 144            assert_hvf_ok(ret);
 145        }
 146    }
 147
 148    if (!slot->size) {
 149        return 0;
 150    }
 151
 152    macslot->present = 1;
 153    macslot->gpa_start = slot->start;
 154    macslot->size = slot->size;
 155    ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
 156    assert_hvf_ok(ret);
 157    return 0;
 158}
 159
 160void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
 161{
 162    hvf_slot *mem;
 163    MemoryRegion *area = section->mr;
 164    bool writeable = !area->readonly && !area->rom_device;
 165    hv_memory_flags_t flags;
 166
 167    if (!memory_region_is_ram(area)) {
 168        if (writeable) {
 169            return;
 170        } else if (!memory_region_is_romd(area)) {
 171            /*
 172             * If the memory device is not in romd_mode, then we actually want
 173             * to remove the hvf memory slot so all accesses will trap.
 174             */
 175             add = false;
 176        }
 177    }
 178
 179    mem = hvf_find_overlap_slot(
 180            section->offset_within_address_space,
 181            int128_get64(section->size));
 182
 183    if (mem && add) {
 184        if (mem->size == int128_get64(section->size) &&
 185            mem->start == section->offset_within_address_space &&
 186            mem->mem == (memory_region_get_ram_ptr(area) +
 187            section->offset_within_region)) {
 188            return; /* Same region was attempted to register, go away. */
 189        }
 190    }
 191
 192    /* Region needs to be reset. set the size to 0 and remap it. */
 193    if (mem) {
 194        mem->size = 0;
 195        if (do_hvf_set_memory(mem, 0)) {
 196            error_report("Failed to reset overlapping slot");
 197            abort();
 198        }
 199    }
 200
 201    if (!add) {
 202        return;
 203    }
 204
 205    if (area->readonly ||
 206        (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
 207        flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
 208    } else {
 209        flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
 210    }
 211
 212    /* Now make a new slot. */
 213    int x;
 214
 215    for (x = 0; x < hvf_state->num_slots; ++x) {
 216        mem = &hvf_state->slots[x];
 217        if (!mem->size) {
 218            break;
 219        }
 220    }
 221
 222    if (x == hvf_state->num_slots) {
 223        error_report("No free slots");
 224        abort();
 225    }
 226
 227    mem->size = int128_get64(section->size);
 228    mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
 229    mem->start = section->offset_within_address_space;
 230    mem->region = area;
 231
 232    if (do_hvf_set_memory(mem, flags)) {
 233        error_report("Error registering new memory slot");
 234        abort();
 235    }
 236}
 237
 238void vmx_update_tpr(CPUState *cpu)
 239{
 240    /* TODO: need integrate APIC handling */
 241    X86CPU *x86_cpu = X86_CPU(cpu);
 242    int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
 243    int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 244
 245    wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
 246    if (irr == -1) {
 247        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 248    } else {
 249        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
 250              irr >> 4);
 251    }
 252}
 253
 254void update_apic_tpr(CPUState *cpu)
 255{
 256    X86CPU *x86_cpu = X86_CPU(cpu);
 257    int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
 258    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 259}
 260
 261#define VECTORING_INFO_VECTOR_MASK     0xff
 262
 263static void hvf_handle_interrupt(CPUState * cpu, int mask)
 264{
 265    cpu->interrupt_request |= mask;
 266    if (!qemu_cpu_is_self(cpu)) {
 267        qemu_cpu_kick(cpu);
 268    }
 269}
 270
 271void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
 272                  int direction, int size, int count)
 273{
 274    int i;
 275    uint8_t *ptr = buffer;
 276
 277    for (i = 0; i < count; i++) {
 278        address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
 279                         ptr, size,
 280                         direction);
 281        ptr += size;
 282    }
 283}
 284
 285/* TODO: synchronize vcpu state */
 286static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 287{
 288    CPUState *cpu_state = cpu;
 289    if (cpu_state->vcpu_dirty == 0) {
 290        hvf_get_registers(cpu_state);
 291    }
 292
 293    cpu_state->vcpu_dirty = 1;
 294}
 295
 296void hvf_cpu_synchronize_state(CPUState *cpu_state)
 297{
 298    if (cpu_state->vcpu_dirty == 0) {
 299        run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
 300    }
 301}
 302
 303static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 304{
 305    CPUState *cpu_state = cpu;
 306    hvf_put_registers(cpu_state);
 307    cpu_state->vcpu_dirty = false;
 308}
 309
 310void hvf_cpu_synchronize_post_reset(CPUState *cpu_state)
 311{
 312    run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 313}
 314
 315void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 316{
 317    CPUState *cpu_state = cpu;
 318    hvf_put_registers(cpu_state);
 319    cpu_state->vcpu_dirty = false;
 320}
 321
 322void hvf_cpu_synchronize_post_init(CPUState *cpu_state)
 323{
 324    run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 325}
 326
 327static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
 328{
 329    int read, write;
 330
 331    /* EPT fault on an instruction fetch doesn't make sense here */
 332    if (ept_qual & EPT_VIOLATION_INST_FETCH) {
 333        return false;
 334    }
 335
 336    /* EPT fault must be a read fault or a write fault */
 337    read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
 338    write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
 339    if ((read | write) == 0) {
 340        return false;
 341    }
 342
 343    if (write && slot) {
 344        if (slot->flags & HVF_SLOT_LOG) {
 345            memory_region_set_dirty(slot->region, gpa - slot->start, 1);
 346            hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 347                          HV_MEMORY_READ | HV_MEMORY_WRITE);
 348        }
 349    }
 350
 351    /*
 352     * The EPT violation must have been caused by accessing a
 353     * guest-physical address that is a translation of a guest-linear
 354     * address.
 355     */
 356    if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
 357        (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
 358        return false;
 359    }
 360
 361    if (!slot) {
 362        return true;
 363    }
 364    if (!memory_region_is_ram(slot->region) &&
 365        !(read && memory_region_is_romd(slot->region))) {
 366        return true;
 367    }
 368    return false;
 369}
 370
 371static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
 372{
 373    hvf_slot *slot;
 374
 375    slot = hvf_find_overlap_slot(
 376            section->offset_within_address_space,
 377            int128_get64(section->size));
 378
 379    /* protect region against writes; begin tracking it */
 380    if (on) {
 381        slot->flags |= HVF_SLOT_LOG;
 382        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 383                      HV_MEMORY_READ);
 384    /* stop tracking region*/
 385    } else {
 386        slot->flags &= ~HVF_SLOT_LOG;
 387        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 388                      HV_MEMORY_READ | HV_MEMORY_WRITE);
 389    }
 390}
 391
 392static void hvf_log_start(MemoryListener *listener,
 393                          MemoryRegionSection *section, int old, int new)
 394{
 395    if (old != 0) {
 396        return;
 397    }
 398
 399    hvf_set_dirty_tracking(section, 1);
 400}
 401
 402static void hvf_log_stop(MemoryListener *listener,
 403                         MemoryRegionSection *section, int old, int new)
 404{
 405    if (new != 0) {
 406        return;
 407    }
 408
 409    hvf_set_dirty_tracking(section, 0);
 410}
 411
 412static void hvf_log_sync(MemoryListener *listener,
 413                         MemoryRegionSection *section)
 414{
 415    /*
 416     * sync of dirty pages is handled elsewhere; just make sure we keep
 417     * tracking the region.
 418     */
 419    hvf_set_dirty_tracking(section, 1);
 420}
 421
 422static void hvf_region_add(MemoryListener *listener,
 423                           MemoryRegionSection *section)
 424{
 425    hvf_set_phys_mem(section, true);
 426}
 427
 428static void hvf_region_del(MemoryListener *listener,
 429                           MemoryRegionSection *section)
 430{
 431    hvf_set_phys_mem(section, false);
 432}
 433
 434static MemoryListener hvf_memory_listener = {
 435    .priority = 10,
 436    .region_add = hvf_region_add,
 437    .region_del = hvf_region_del,
 438    .log_start = hvf_log_start,
 439    .log_stop = hvf_log_stop,
 440    .log_sync = hvf_log_sync,
 441};
 442
 443void hvf_reset_vcpu(CPUState *cpu) {
 444    uint64_t pdpte[4] = {0, 0, 0, 0};
 445    int i;
 446
 447    /* TODO: this shouldn't be needed; there is already a call to
 448     * cpu_synchronize_all_post_reset in vl.c
 449     */
 450    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0);
 451    wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0);
 452
 453    /* Initialize PDPTE */
 454    for (i = 0; i < 4; i++) {
 455        wvmcs(cpu->hvf_fd, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]);
 456    }
 457
 458    macvm_set_cr0(cpu->hvf_fd, 0x60000010);
 459
 460    wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK);
 461    wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0);
 462    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK);
 463
 464    /* set VMCS guest state fields */
 465    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000);
 466    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff);
 467    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b);
 468    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000);
 469
 470    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0);
 471    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff);
 472    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93);
 473    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0);
 474
 475    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0);
 476    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff);
 477    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93);
 478    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0);
 479
 480    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0);
 481    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff);
 482    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93);
 483    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0);
 484
 485    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0);
 486    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff);
 487    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93);
 488    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0);
 489
 490    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0);
 491    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff);
 492    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93);
 493    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0);
 494
 495    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0);
 496    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0);
 497    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000);
 498    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0);
 499
 500    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0);
 501    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0);
 502    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83);
 503    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0);
 504
 505    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0);
 506    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0);
 507
 508    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0);
 509    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0);
 510
 511    /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/
 512    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0);
 513
 514    wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0);
 515    wreg(cpu->hvf_fd, HV_X86_RDX, 0x623);
 516    wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2);
 517    wreg(cpu->hvf_fd, HV_X86_RSP, 0x0);
 518    wreg(cpu->hvf_fd, HV_X86_RAX, 0x0);
 519    wreg(cpu->hvf_fd, HV_X86_RBX, 0x0);
 520    wreg(cpu->hvf_fd, HV_X86_RCX, 0x0);
 521    wreg(cpu->hvf_fd, HV_X86_RSI, 0x0);
 522    wreg(cpu->hvf_fd, HV_X86_RDI, 0x0);
 523    wreg(cpu->hvf_fd, HV_X86_RBP, 0x0);
 524
 525    for (int i = 0; i < 8; i++) {
 526        wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0);
 527    }
 528
 529    hv_vcpu_invalidate_tlb(cpu->hvf_fd);
 530    hv_vcpu_flush(cpu->hvf_fd);
 531}
 532
 533void hvf_vcpu_destroy(CPUState *cpu)
 534{
 535    hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
 536    assert_hvf_ok(ret);
 537}
 538
 539static void dummy_signal(int sig)
 540{
 541}
 542
 543int hvf_init_vcpu(CPUState *cpu)
 544{
 545
 546    X86CPU *x86cpu = X86_CPU(cpu);
 547    CPUX86State *env = &x86cpu->env;
 548    int r;
 549
 550    /* init cpu signals */
 551    sigset_t set;
 552    struct sigaction sigact;
 553
 554    memset(&sigact, 0, sizeof(sigact));
 555    sigact.sa_handler = dummy_signal;
 556    sigaction(SIG_IPI, &sigact, NULL);
 557
 558    pthread_sigmask(SIG_BLOCK, NULL, &set);
 559    sigdelset(&set, SIG_IPI);
 560
 561    init_emu();
 562    init_decoder();
 563
 564    hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
 565    env->hvf_emul = g_new0(HVFX86EmulatorState, 1);
 566
 567    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
 568    cpu->vcpu_dirty = 1;
 569    assert_hvf_ok(r);
 570
 571    if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
 572        &hvf_state->hvf_caps->vmx_cap_pinbased)) {
 573        abort();
 574    }
 575    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
 576        &hvf_state->hvf_caps->vmx_cap_procbased)) {
 577        abort();
 578    }
 579    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
 580        &hvf_state->hvf_caps->vmx_cap_procbased2)) {
 581        abort();
 582    }
 583    if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
 584        &hvf_state->hvf_caps->vmx_cap_entry)) {
 585        abort();
 586    }
 587
 588    /* set VMCS control fields */
 589    wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
 590          cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
 591          VMCS_PIN_BASED_CTLS_EXTINT |
 592          VMCS_PIN_BASED_CTLS_NMI |
 593          VMCS_PIN_BASED_CTLS_VNMI));
 594    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
 595          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
 596          VMCS_PRI_PROC_BASED_CTLS_HLT |
 597          VMCS_PRI_PROC_BASED_CTLS_MWAIT |
 598          VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
 599          VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
 600          VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
 601    wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
 602          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
 603                   VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 604
 605    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
 606          0));
 607    wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
 608
 609    wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 610
 611    x86cpu = X86_CPU(cpu);
 612    x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
 613
 614    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
 615    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
 616    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
 617    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
 618    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
 619    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
 620    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
 621    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
 622    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);
 623    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
 624    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
 625    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
 626
 627    return 0;
 628}
 629
 630static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
 631{
 632    X86CPU *x86_cpu = X86_CPU(cpu);
 633    CPUX86State *env = &x86_cpu->env;
 634
 635    env->exception_nr = -1;
 636    env->exception_pending = 0;
 637    env->exception_injected = 0;
 638    env->interrupt_injected = -1;
 639    env->nmi_injected = false;
 640    env->ins_len = 0;
 641    env->has_error_code = false;
 642    if (idtvec_info & VMCS_IDT_VEC_VALID) {
 643        switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
 644        case VMCS_IDT_VEC_HWINTR:
 645        case VMCS_IDT_VEC_SWINTR:
 646            env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 647            break;
 648        case VMCS_IDT_VEC_NMI:
 649            env->nmi_injected = true;
 650            break;
 651        case VMCS_IDT_VEC_HWEXCEPTION:
 652        case VMCS_IDT_VEC_SWEXCEPTION:
 653            env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
 654            env->exception_injected = 1;
 655            break;
 656        case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
 657        default:
 658            abort();
 659        }
 660        if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
 661            (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
 662            env->ins_len = ins_len;
 663        }
 664        if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
 665            env->has_error_code = true;
 666            env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
 667        }
 668    }
 669    if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 670        VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
 671        env->hflags2 |= HF2_NMI_MASK;
 672    } else {
 673        env->hflags2 &= ~HF2_NMI_MASK;
 674    }
 675    if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 676         (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
 677         VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
 678        env->hflags |= HF_INHIBIT_IRQ_MASK;
 679    } else {
 680        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
 681    }
 682}
 683
 684int hvf_vcpu_exec(CPUState *cpu)
 685{
 686    X86CPU *x86_cpu = X86_CPU(cpu);
 687    CPUX86State *env = &x86_cpu->env;
 688    int ret = 0;
 689    uint64_t rip = 0;
 690
 691    if (hvf_process_events(cpu)) {
 692        return EXCP_HLT;
 693    }
 694
 695    do {
 696        if (cpu->vcpu_dirty) {
 697            hvf_put_registers(cpu);
 698            cpu->vcpu_dirty = false;
 699        }
 700
 701        if (hvf_inject_interrupts(cpu)) {
 702            return EXCP_INTERRUPT;
 703        }
 704        vmx_update_tpr(cpu);
 705
 706        qemu_mutex_unlock_iothread();
 707        if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
 708            qemu_mutex_lock_iothread();
 709            return EXCP_HLT;
 710        }
 711
 712        hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
 713        assert_hvf_ok(r);
 714
 715        /* handle VMEXIT */
 716        uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
 717        uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
 718        uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
 719                                           VMCS_EXIT_INSTRUCTION_LENGTH);
 720
 721        uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 722
 723        hvf_store_events(cpu, ins_len, idtvec_info);
 724        rip = rreg(cpu->hvf_fd, HV_X86_RIP);
 725        RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
 726        env->eflags = RFLAGS(env);
 727
 728        qemu_mutex_lock_iothread();
 729
 730        update_apic_tpr(cpu);
 731        current_cpu = cpu;
 732
 733        ret = 0;
 734        switch (exit_reason) {
 735        case EXIT_REASON_HLT: {
 736            macvm_set_rip(cpu, rip + ins_len);
 737            if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 738                (EFLAGS(env) & IF_MASK))
 739                && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
 740                !(idtvec_info & VMCS_IDT_VEC_VALID)) {
 741                cpu->halted = 1;
 742                ret = EXCP_HLT;
 743                break;
 744            }
 745            ret = EXCP_INTERRUPT;
 746            break;
 747        }
 748        case EXIT_REASON_MWAIT: {
 749            ret = EXCP_INTERRUPT;
 750            break;
 751        }
 752        /* Need to check if MMIO or unmapped fault */
 753        case EXIT_REASON_EPT_FAULT:
 754        {
 755            hvf_slot *slot;
 756            uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
 757
 758            if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
 759                ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
 760                vmx_set_nmi_blocking(cpu);
 761            }
 762
 763            slot = hvf_find_overlap_slot(gpa, 1);
 764            /* mmio */
 765            if (ept_emulation_fault(slot, gpa, exit_qual)) {
 766                struct x86_decode decode;
 767
 768                load_regs(cpu);
 769                env->hvf_emul->fetch_rip = rip;
 770
 771                decode_instruction(env, &decode);
 772                exec_instruction(env, &decode);
 773                store_regs(cpu);
 774                break;
 775            }
 776            break;
 777        }
 778        case EXIT_REASON_INOUT:
 779        {
 780            uint32_t in = (exit_qual & 8) != 0;
 781            uint32_t size =  (exit_qual & 7) + 1;
 782            uint32_t string =  (exit_qual & 16) != 0;
 783            uint32_t port =  exit_qual >> 16;
 784            /*uint32_t rep = (exit_qual & 0x20) != 0;*/
 785
 786            if (!string && in) {
 787                uint64_t val = 0;
 788                load_regs(cpu);
 789                hvf_handle_io(env, port, &val, 0, size, 1);
 790                if (size == 1) {
 791                    AL(env) = val;
 792                } else if (size == 2) {
 793                    AX(env) = val;
 794                } else if (size == 4) {
 795                    RAX(env) = (uint32_t)val;
 796                } else {
 797                    RAX(env) = (uint64_t)val;
 798                }
 799                RIP(env) += ins_len;
 800                store_regs(cpu);
 801                break;
 802            } else if (!string && !in) {
 803                RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
 804                hvf_handle_io(env, port, &RAX(env), 1, size, 1);
 805                macvm_set_rip(cpu, rip + ins_len);
 806                break;
 807            }
 808            struct x86_decode decode;
 809
 810            load_regs(cpu);
 811            env->hvf_emul->fetch_rip = rip;
 812
 813            decode_instruction(env, &decode);
 814            assert(ins_len == decode.len);
 815            exec_instruction(env, &decode);
 816            store_regs(cpu);
 817
 818            break;
 819        }
 820        case EXIT_REASON_CPUID: {
 821            uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 822            uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
 823            uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 824            uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 825
 826            cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 827
 828            wreg(cpu->hvf_fd, HV_X86_RAX, rax);
 829            wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
 830            wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
 831            wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
 832
 833            macvm_set_rip(cpu, rip + ins_len);
 834            break;
 835        }
 836        case EXIT_REASON_XSETBV: {
 837            X86CPU *x86_cpu = X86_CPU(cpu);
 838            CPUX86State *env = &x86_cpu->env;
 839            uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 840            uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 841            uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 842
 843            if (ecx) {
 844                macvm_set_rip(cpu, rip + ins_len);
 845                break;
 846            }
 847            env->xcr0 = ((uint64_t)edx << 32) | eax;
 848            wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
 849            macvm_set_rip(cpu, rip + ins_len);
 850            break;
 851        }
 852        case EXIT_REASON_INTR_WINDOW:
 853            vmx_clear_int_window_exiting(cpu);
 854            ret = EXCP_INTERRUPT;
 855            break;
 856        case EXIT_REASON_NMI_WINDOW:
 857            vmx_clear_nmi_window_exiting(cpu);
 858            ret = EXCP_INTERRUPT;
 859            break;
 860        case EXIT_REASON_EXT_INTR:
 861            /* force exit and allow io handling */
 862            ret = EXCP_INTERRUPT;
 863            break;
 864        case EXIT_REASON_RDMSR:
 865        case EXIT_REASON_WRMSR:
 866        {
 867            load_regs(cpu);
 868            if (exit_reason == EXIT_REASON_RDMSR) {
 869                simulate_rdmsr(cpu);
 870            } else {
 871                simulate_wrmsr(cpu);
 872            }
 873            RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
 874            store_regs(cpu);
 875            break;
 876        }
 877        case EXIT_REASON_CR_ACCESS: {
 878            int cr;
 879            int reg;
 880
 881            load_regs(cpu);
 882            cr = exit_qual & 15;
 883            reg = (exit_qual >> 8) & 15;
 884
 885            switch (cr) {
 886            case 0x0: {
 887                macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
 888                break;
 889            }
 890            case 4: {
 891                macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
 892                break;
 893            }
 894            case 8: {
 895                X86CPU *x86_cpu = X86_CPU(cpu);
 896                if (exit_qual & 0x10) {
 897                    RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
 898                } else {
 899                    int tpr = RRX(env, reg);
 900                    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 901                    ret = EXCP_INTERRUPT;
 902                }
 903                break;
 904            }
 905            default:
 906                error_report("Unrecognized CR %d", cr);
 907                abort();
 908            }
 909            RIP(env) += ins_len;
 910            store_regs(cpu);
 911            break;
 912        }
 913        case EXIT_REASON_APIC_ACCESS: { /* TODO */
 914            struct x86_decode decode;
 915
 916            load_regs(cpu);
 917            env->hvf_emul->fetch_rip = rip;
 918
 919            decode_instruction(env, &decode);
 920            exec_instruction(env, &decode);
 921            store_regs(cpu);
 922            break;
 923        }
 924        case EXIT_REASON_TPR: {
 925            ret = 1;
 926            break;
 927        }
 928        case EXIT_REASON_TASK_SWITCH: {
 929            uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 930            x68_segment_selector sel = {.sel = exit_qual & 0xffff};
 931            vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
 932             vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
 933             & VMCS_INTR_T_MASK);
 934            break;
 935        }
 936        case EXIT_REASON_TRIPLE_FAULT: {
 937            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 938            ret = EXCP_INTERRUPT;
 939            break;
 940        }
 941        case EXIT_REASON_RDPMC:
 942            wreg(cpu->hvf_fd, HV_X86_RAX, 0);
 943            wreg(cpu->hvf_fd, HV_X86_RDX, 0);
 944            macvm_set_rip(cpu, rip + ins_len);
 945            break;
 946        case VMX_REASON_VMCALL:
 947            env->exception_nr = EXCP0D_GPF;
 948            env->exception_injected = 1;
 949            env->has_error_code = true;
 950            env->error_code = 0;
 951            break;
 952        default:
 953            error_report("%llx: unhandled exit %llx", rip, exit_reason);
 954        }
 955    } while (ret == 0);
 956
 957    return ret;
 958}
 959
 960bool hvf_allowed;
 961
 962static int hvf_accel_init(MachineState *ms)
 963{
 964    int x;
 965    hv_return_t ret;
 966    HVFState *s;
 967
 968    ret = hv_vm_create(HV_VM_DEFAULT);
 969    assert_hvf_ok(ret);
 970
 971    s = g_new0(HVFState, 1);
 972 
 973    s->num_slots = 32;
 974    for (x = 0; x < s->num_slots; ++x) {
 975        s->slots[x].size = 0;
 976        s->slots[x].slot_id = x;
 977    }
 978  
 979    hvf_state = s;
 980    cpu_interrupt_handler = hvf_handle_interrupt;
 981    memory_listener_register(&hvf_memory_listener, &address_space_memory);
 982    return 0;
 983}
 984
 985static void hvf_accel_class_init(ObjectClass *oc, void *data)
 986{
 987    AccelClass *ac = ACCEL_CLASS(oc);
 988    ac->name = "HVF";
 989    ac->init_machine = hvf_accel_init;
 990    ac->allowed = &hvf_allowed;
 991}
 992
 993static const TypeInfo hvf_accel_type = {
 994    .name = TYPE_HVF_ACCEL,
 995    .parent = TYPE_ACCEL,
 996    .class_init = hvf_accel_class_init,
 997};
 998
 999static void hvf_type_init(void)
1000{
1001    type_register_static(&hvf_accel_type);
1002}
1003
1004type_init(hvf_type_init);
1005