qemu/target/i386/hvf/hvf.c
<<
>>
Prefs
   1/* Copyright 2008 IBM Corporation
   2 *           2008 Red Hat, Inc.
   3 * Copyright 2011 Intel Corporation
   4 * Copyright 2016 Veertu, Inc.
   5 * Copyright 2017 The Android Open Source Project
   6 *
   7 * QEMU Hypervisor.framework support
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of version 2 of the GNU General Public
  11 * License as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  20 *
  21 * This file contain code under public domain from the hvdos project:
  22 * https://github.com/mist64/hvdos
  23 *
  24 * Parts Copyright (c) 2011 NetApp, Inc.
  25 * All rights reserved.
  26 *
  27 * Redistribution and use in source and binary forms, with or without
  28 * modification, are permitted provided that the following conditions
  29 * are met:
  30 * 1. Redistributions of source code must retain the above copyright
  31 *    notice, this list of conditions and the following disclaimer.
  32 * 2. Redistributions in binary form must reproduce the above copyright
  33 *    notice, this list of conditions and the following disclaimer in the
  34 *    documentation and/or other materials provided with the distribution.
  35 *
  36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46 * SUCH DAMAGE.
  47 */
  48#include "qemu/osdep.h"
  49#include "qemu-common.h"
  50#include "qemu/error-report.h"
  51
  52#include "sysemu/hvf.h"
  53#include "hvf-i386.h"
  54#include "vmcs.h"
  55#include "vmx.h"
  56#include "x86.h"
  57#include "x86_descr.h"
  58#include "x86_mmu.h"
  59#include "x86_decode.h"
  60#include "x86_emu.h"
  61#include "x86_task.h"
  62#include "x86hvf.h"
  63
  64#include <Hypervisor/hv.h>
  65#include <Hypervisor/hv_vmx.h>
  66
  67#include "exec/address-spaces.h"
  68#include "exec/exec-all.h"
  69#include "exec/ioport.h"
  70#include "hw/i386/apic_internal.h"
  71#include "hw/boards.h"
  72#include "qemu/main-loop.h"
  73#include "sysemu/accel.h"
  74#include "sysemu/sysemu.h"
  75#include "target/i386/cpu.h"
  76
  77pthread_rwlock_t mem_lock = PTHREAD_RWLOCK_INITIALIZER;
  78HVFState *hvf_state;
  79int hvf_disabled = 1;
  80
  81static void assert_hvf_ok(hv_return_t ret)
  82{
  83    if (ret == HV_SUCCESS) {
  84        return;
  85    }
  86
  87    switch (ret) {
  88    case HV_ERROR:
  89        error_report("Error: HV_ERROR");
  90        break;
  91    case HV_BUSY:
  92        error_report("Error: HV_BUSY");
  93        break;
  94    case HV_BAD_ARGUMENT:
  95        error_report("Error: HV_BAD_ARGUMENT");
  96        break;
  97    case HV_NO_RESOURCES:
  98        error_report("Error: HV_NO_RESOURCES");
  99        break;
 100    case HV_NO_DEVICE:
 101        error_report("Error: HV_NO_DEVICE");
 102        break;
 103    case HV_UNSUPPORTED:
 104        error_report("Error: HV_UNSUPPORTED");
 105        break;
 106    default:
 107        error_report("Unknown Error");
 108    }
 109
 110    abort();
 111}
 112
 113/* Memory slots */
 114hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end)
 115{
 116    hvf_slot *slot;
 117    int x;
 118    for (x = 0; x < hvf_state->num_slots; ++x) {
 119        slot = &hvf_state->slots[x];
 120        if (slot->size && start < (slot->start + slot->size) &&
 121            end > slot->start) {
 122            return slot;
 123        }
 124    }
 125    return NULL;
 126}
 127
 128struct mac_slot {
 129    int present;
 130    uint64_t size;
 131    uint64_t gpa_start;
 132    uint64_t gva;
 133};
 134
 135struct mac_slot mac_slots[32];
 136#define ALIGN(x, y)  (((x) + (y) - 1) & ~((y) - 1))
 137
 138static int do_hvf_set_memory(hvf_slot *slot)
 139{
 140    struct mac_slot *macslot;
 141    hv_memory_flags_t flags;
 142    hv_return_t ret;
 143
 144    macslot = &mac_slots[slot->slot_id];
 145
 146    if (macslot->present) {
 147        if (macslot->size != slot->size) {
 148            macslot->present = 0;
 149            ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
 150            assert_hvf_ok(ret);
 151        }
 152    }
 153
 154    if (!slot->size) {
 155        return 0;
 156    }
 157
 158    flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
 159
 160    macslot->present = 1;
 161    macslot->gpa_start = slot->start;
 162    macslot->size = slot->size;
 163    ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
 164    assert_hvf_ok(ret);
 165    return 0;
 166}
 167
 168void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
 169{
 170    hvf_slot *mem;
 171    MemoryRegion *area = section->mr;
 172
 173    if (!memory_region_is_ram(area)) {
 174        return;
 175    }
 176
 177    mem = hvf_find_overlap_slot(
 178            section->offset_within_address_space,
 179            section->offset_within_address_space + int128_get64(section->size));
 180
 181    if (mem && add) {
 182        if (mem->size == int128_get64(section->size) &&
 183            mem->start == section->offset_within_address_space &&
 184            mem->mem == (memory_region_get_ram_ptr(area) +
 185            section->offset_within_region)) {
 186            return; /* Same region was attempted to register, go away. */
 187        }
 188    }
 189
 190    /* Region needs to be reset. set the size to 0 and remap it. */
 191    if (mem) {
 192        mem->size = 0;
 193        if (do_hvf_set_memory(mem)) {
 194            error_report("Failed to reset overlapping slot");
 195            abort();
 196        }
 197    }
 198
 199    if (!add) {
 200        return;
 201    }
 202
 203    /* Now make a new slot. */
 204    int x;
 205
 206    for (x = 0; x < hvf_state->num_slots; ++x) {
 207        mem = &hvf_state->slots[x];
 208        if (!mem->size) {
 209            break;
 210        }
 211    }
 212
 213    if (x == hvf_state->num_slots) {
 214        error_report("No free slots");
 215        abort();
 216    }
 217
 218    mem->size = int128_get64(section->size);
 219    mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
 220    mem->start = section->offset_within_address_space;
 221    mem->region = area;
 222
 223    if (do_hvf_set_memory(mem)) {
 224        error_report("Error registering new memory slot");
 225        abort();
 226    }
 227}
 228
 229void vmx_update_tpr(CPUState *cpu)
 230{
 231    /* TODO: need integrate APIC handling */
 232    X86CPU *x86_cpu = X86_CPU(cpu);
 233    int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
 234    int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 235
 236    wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
 237    if (irr == -1) {
 238        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 239    } else {
 240        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
 241              irr >> 4);
 242    }
 243}
 244
 245void update_apic_tpr(CPUState *cpu)
 246{
 247    X86CPU *x86_cpu = X86_CPU(cpu);
 248    int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
 249    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 250}
 251
 252#define VECTORING_INFO_VECTOR_MASK     0xff
 253
 254static void hvf_handle_interrupt(CPUState * cpu, int mask)
 255{
 256    cpu->interrupt_request |= mask;
 257    if (!qemu_cpu_is_self(cpu)) {
 258        qemu_cpu_kick(cpu);
 259    }
 260}
 261
 262void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
 263                  int direction, int size, int count)
 264{
 265    int i;
 266    uint8_t *ptr = buffer;
 267
 268    for (i = 0; i < count; i++) {
 269        address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
 270                         ptr, size,
 271                         direction);
 272        ptr += size;
 273    }
 274}
 275
 276/* TODO: synchronize vcpu state */
 277static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 278{
 279    CPUState *cpu_state = cpu;
 280    if (cpu_state->vcpu_dirty == 0) {
 281        hvf_get_registers(cpu_state);
 282    }
 283
 284    cpu_state->vcpu_dirty = 1;
 285}
 286
 287void hvf_cpu_synchronize_state(CPUState *cpu_state)
 288{
 289    if (cpu_state->vcpu_dirty == 0) {
 290        run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
 291    }
 292}
 293
 294static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 295{
 296    CPUState *cpu_state = cpu;
 297    hvf_put_registers(cpu_state);
 298    cpu_state->vcpu_dirty = false;
 299}
 300
 301void hvf_cpu_synchronize_post_reset(CPUState *cpu_state)
 302{
 303    run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 304}
 305
 306void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 307{
 308    CPUState *cpu_state = cpu;
 309    hvf_put_registers(cpu_state);
 310    cpu_state->vcpu_dirty = false;
 311}
 312
 313void hvf_cpu_synchronize_post_init(CPUState *cpu_state)
 314{
 315    run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 316}
 317
 318static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
 319{
 320    int read, write;
 321
 322    /* EPT fault on an instruction fetch doesn't make sense here */
 323    if (ept_qual & EPT_VIOLATION_INST_FETCH) {
 324        return false;
 325    }
 326
 327    /* EPT fault must be a read fault or a write fault */
 328    read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
 329    write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
 330    if ((read | write) == 0) {
 331        return false;
 332    }
 333
 334    if (write && slot) {
 335        if (slot->flags & HVF_SLOT_LOG) {
 336            memory_region_set_dirty(slot->region, gpa - slot->start, 1);
 337            hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 338                          HV_MEMORY_READ | HV_MEMORY_WRITE);
 339        }
 340    }
 341
 342    /*
 343     * The EPT violation must have been caused by accessing a
 344     * guest-physical address that is a translation of a guest-linear
 345     * address.
 346     */
 347    if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
 348        (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
 349        return false;
 350    }
 351
 352    return !slot;
 353}
 354
 355static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
 356{
 357    hvf_slot *slot;
 358
 359    slot = hvf_find_overlap_slot(
 360            section->offset_within_address_space,
 361            section->offset_within_address_space + int128_get64(section->size));
 362
 363    /* protect region against writes; begin tracking it */
 364    if (on) {
 365        slot->flags |= HVF_SLOT_LOG;
 366        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 367                      HV_MEMORY_READ);
 368    /* stop tracking region*/
 369    } else {
 370        slot->flags &= ~HVF_SLOT_LOG;
 371        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 372                      HV_MEMORY_READ | HV_MEMORY_WRITE);
 373    }
 374}
 375
 376static void hvf_log_start(MemoryListener *listener,
 377                          MemoryRegionSection *section, int old, int new)
 378{
 379    if (old != 0) {
 380        return;
 381    }
 382
 383    hvf_set_dirty_tracking(section, 1);
 384}
 385
 386static void hvf_log_stop(MemoryListener *listener,
 387                         MemoryRegionSection *section, int old, int new)
 388{
 389    if (new != 0) {
 390        return;
 391    }
 392
 393    hvf_set_dirty_tracking(section, 0);
 394}
 395
 396static void hvf_log_sync(MemoryListener *listener,
 397                         MemoryRegionSection *section)
 398{
 399    /*
 400     * sync of dirty pages is handled elsewhere; just make sure we keep
 401     * tracking the region.
 402     */
 403    hvf_set_dirty_tracking(section, 1);
 404}
 405
 406static void hvf_region_add(MemoryListener *listener,
 407                           MemoryRegionSection *section)
 408{
 409    hvf_set_phys_mem(section, true);
 410}
 411
 412static void hvf_region_del(MemoryListener *listener,
 413                           MemoryRegionSection *section)
 414{
 415    hvf_set_phys_mem(section, false);
 416}
 417
 418static MemoryListener hvf_memory_listener = {
 419    .priority = 10,
 420    .region_add = hvf_region_add,
 421    .region_del = hvf_region_del,
 422    .log_start = hvf_log_start,
 423    .log_stop = hvf_log_stop,
 424    .log_sync = hvf_log_sync,
 425};
 426
 427void hvf_reset_vcpu(CPUState *cpu) {
 428
 429    /* TODO: this shouldn't be needed; there is already a call to
 430     * cpu_synchronize_all_post_reset in vl.c
 431     */
 432    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0);
 433    wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0);
 434    macvm_set_cr0(cpu->hvf_fd, 0x60000010);
 435
 436    wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK);
 437    wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0);
 438    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK);
 439
 440    /* set VMCS guest state fields */
 441    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000);
 442    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff);
 443    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b);
 444    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000);
 445
 446    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0);
 447    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff);
 448    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93);
 449    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0);
 450
 451    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0);
 452    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff);
 453    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93);
 454    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0);
 455
 456    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0);
 457    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff);
 458    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93);
 459    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0);
 460
 461    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0);
 462    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff);
 463    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93);
 464    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0);
 465
 466    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0);
 467    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff);
 468    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93);
 469    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0);
 470
 471    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0);
 472    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0);
 473    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000);
 474    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0);
 475
 476    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0);
 477    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0);
 478    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83);
 479    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0);
 480
 481    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0);
 482    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0);
 483
 484    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0);
 485    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0);
 486
 487    /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/
 488    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0);
 489
 490    wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0);
 491    wreg(cpu->hvf_fd, HV_X86_RDX, 0x623);
 492    wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2);
 493    wreg(cpu->hvf_fd, HV_X86_RSP, 0x0);
 494    wreg(cpu->hvf_fd, HV_X86_RAX, 0x0);
 495    wreg(cpu->hvf_fd, HV_X86_RBX, 0x0);
 496    wreg(cpu->hvf_fd, HV_X86_RCX, 0x0);
 497    wreg(cpu->hvf_fd, HV_X86_RSI, 0x0);
 498    wreg(cpu->hvf_fd, HV_X86_RDI, 0x0);
 499    wreg(cpu->hvf_fd, HV_X86_RBP, 0x0);
 500
 501    for (int i = 0; i < 8; i++) {
 502        wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0);
 503    }
 504
 505    hv_vm_sync_tsc(0);
 506    cpu->halted = 0;
 507    hv_vcpu_invalidate_tlb(cpu->hvf_fd);
 508    hv_vcpu_flush(cpu->hvf_fd);
 509}
 510
 511void hvf_vcpu_destroy(CPUState *cpu)
 512{
 513    hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
 514    assert_hvf_ok(ret);
 515}
 516
 517static void dummy_signal(int sig)
 518{
 519}
 520
 521int hvf_init_vcpu(CPUState *cpu)
 522{
 523
 524    X86CPU *x86cpu = X86_CPU(cpu);
 525    CPUX86State *env = &x86cpu->env;
 526    int r;
 527
 528    /* init cpu signals */
 529    sigset_t set;
 530    struct sigaction sigact;
 531
 532    memset(&sigact, 0, sizeof(sigact));
 533    sigact.sa_handler = dummy_signal;
 534    sigaction(SIG_IPI, &sigact, NULL);
 535
 536    pthread_sigmask(SIG_BLOCK, NULL, &set);
 537    sigdelset(&set, SIG_IPI);
 538
 539    init_emu();
 540    init_decoder();
 541
 542    hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
 543    env->hvf_emul = g_new0(HVFX86EmulatorState, 1);
 544
 545    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
 546    cpu->vcpu_dirty = 1;
 547    assert_hvf_ok(r);
 548
 549    if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
 550        &hvf_state->hvf_caps->vmx_cap_pinbased)) {
 551        abort();
 552    }
 553    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
 554        &hvf_state->hvf_caps->vmx_cap_procbased)) {
 555        abort();
 556    }
 557    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
 558        &hvf_state->hvf_caps->vmx_cap_procbased2)) {
 559        abort();
 560    }
 561    if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
 562        &hvf_state->hvf_caps->vmx_cap_entry)) {
 563        abort();
 564    }
 565
 566    /* set VMCS control fields */
 567    wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
 568          cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
 569          VMCS_PIN_BASED_CTLS_EXTINT |
 570          VMCS_PIN_BASED_CTLS_NMI |
 571          VMCS_PIN_BASED_CTLS_VNMI));
 572    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
 573          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
 574          VMCS_PRI_PROC_BASED_CTLS_HLT |
 575          VMCS_PRI_PROC_BASED_CTLS_MWAIT |
 576          VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
 577          VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
 578          VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
 579    wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
 580          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
 581                   VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 582
 583    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
 584          0));
 585    wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
 586
 587    wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 588
 589    hvf_reset_vcpu(cpu);
 590
 591    x86cpu = X86_CPU(cpu);
 592    x86cpu->env.kvm_xsave_buf = qemu_memalign(4096, 4096);
 593
 594    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
 595    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
 596    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
 597    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
 598    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
 599    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
 600    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
 601    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
 602    /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/
 603    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
 604    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
 605    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
 606
 607    return 0;
 608}
 609
 610void hvf_disable(int shouldDisable)
 611{
 612    hvf_disabled = shouldDisable;
 613}
 614
 615static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
 616{
 617    X86CPU *x86_cpu = X86_CPU(cpu);
 618    CPUX86State *env = &x86_cpu->env;
 619
 620    env->exception_injected = -1;
 621    env->interrupt_injected = -1;
 622    env->nmi_injected = false;
 623    if (idtvec_info & VMCS_IDT_VEC_VALID) {
 624        switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
 625        case VMCS_IDT_VEC_HWINTR:
 626        case VMCS_IDT_VEC_SWINTR:
 627            env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 628            break;
 629        case VMCS_IDT_VEC_NMI:
 630            env->nmi_injected = true;
 631            break;
 632        case VMCS_IDT_VEC_HWEXCEPTION:
 633        case VMCS_IDT_VEC_SWEXCEPTION:
 634            env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 635            break;
 636        case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
 637        default:
 638            abort();
 639        }
 640        if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
 641            (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
 642            env->ins_len = ins_len;
 643        }
 644        if (idtvec_info & VMCS_INTR_DEL_ERRCODE) {
 645            env->has_error_code = true;
 646            env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
 647        }
 648    }
 649    if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 650        VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
 651        env->hflags2 |= HF2_NMI_MASK;
 652    } else {
 653        env->hflags2 &= ~HF2_NMI_MASK;
 654    }
 655    if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 656         (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
 657         VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
 658        env->hflags |= HF_INHIBIT_IRQ_MASK;
 659    } else {
 660        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
 661    }
 662}
 663
 664int hvf_vcpu_exec(CPUState *cpu)
 665{
 666    X86CPU *x86_cpu = X86_CPU(cpu);
 667    CPUX86State *env = &x86_cpu->env;
 668    int ret = 0;
 669    uint64_t rip = 0;
 670
 671    cpu->halted = 0;
 672
 673    if (hvf_process_events(cpu)) {
 674        return EXCP_HLT;
 675    }
 676
 677    do {
 678        if (cpu->vcpu_dirty) {
 679            hvf_put_registers(cpu);
 680            cpu->vcpu_dirty = false;
 681        }
 682
 683        if (hvf_inject_interrupts(cpu)) {
 684            return EXCP_INTERRUPT;
 685        }
 686        vmx_update_tpr(cpu);
 687
 688        qemu_mutex_unlock_iothread();
 689        if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
 690            qemu_mutex_lock_iothread();
 691            return EXCP_HLT;
 692        }
 693
 694        hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
 695        assert_hvf_ok(r);
 696
 697        /* handle VMEXIT */
 698        uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
 699        uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
 700        uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
 701                                           VMCS_EXIT_INSTRUCTION_LENGTH);
 702
 703        uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 704
 705        hvf_store_events(cpu, ins_len, idtvec_info);
 706        rip = rreg(cpu->hvf_fd, HV_X86_RIP);
 707        RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
 708        env->eflags = RFLAGS(env);
 709
 710        qemu_mutex_lock_iothread();
 711
 712        update_apic_tpr(cpu);
 713        current_cpu = cpu;
 714
 715        ret = 0;
 716        switch (exit_reason) {
 717        case EXIT_REASON_HLT: {
 718            macvm_set_rip(cpu, rip + ins_len);
 719            if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 720                (EFLAGS(env) & IF_MASK))
 721                && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
 722                !(idtvec_info & VMCS_IDT_VEC_VALID)) {
 723                cpu->halted = 1;
 724                ret = EXCP_HLT;
 725            }
 726            ret = EXCP_INTERRUPT;
 727            break;
 728        }
 729        case EXIT_REASON_MWAIT: {
 730            ret = EXCP_INTERRUPT;
 731            break;
 732        }
 733            /* Need to check if MMIO or unmmaped fault */
 734        case EXIT_REASON_EPT_FAULT:
 735        {
 736            hvf_slot *slot;
 737            uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
 738
 739            if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
 740                ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
 741                vmx_set_nmi_blocking(cpu);
 742            }
 743
 744            slot = hvf_find_overlap_slot(gpa, gpa);
 745            /* mmio */
 746            if (ept_emulation_fault(slot, gpa, exit_qual)) {
 747                struct x86_decode decode;
 748
 749                load_regs(cpu);
 750                env->hvf_emul->fetch_rip = rip;
 751
 752                decode_instruction(env, &decode);
 753                exec_instruction(env, &decode);
 754                store_regs(cpu);
 755                break;
 756            }
 757            break;
 758        }
 759        case EXIT_REASON_INOUT:
 760        {
 761            uint32_t in = (exit_qual & 8) != 0;
 762            uint32_t size =  (exit_qual & 7) + 1;
 763            uint32_t string =  (exit_qual & 16) != 0;
 764            uint32_t port =  exit_qual >> 16;
 765            /*uint32_t rep = (exit_qual & 0x20) != 0;*/
 766
 767            if (!string && in) {
 768                uint64_t val = 0;
 769                load_regs(cpu);
 770                hvf_handle_io(env, port, &val, 0, size, 1);
 771                if (size == 1) {
 772                    AL(env) = val;
 773                } else if (size == 2) {
 774                    AX(env) = val;
 775                } else if (size == 4) {
 776                    RAX(env) = (uint32_t)val;
 777                } else {
 778                    RAX(env) = (uint64_t)val;
 779                }
 780                RIP(env) += ins_len;
 781                store_regs(cpu);
 782                break;
 783            } else if (!string && !in) {
 784                RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
 785                hvf_handle_io(env, port, &RAX(env), 1, size, 1);
 786                macvm_set_rip(cpu, rip + ins_len);
 787                break;
 788            }
 789            struct x86_decode decode;
 790
 791            load_regs(cpu);
 792            env->hvf_emul->fetch_rip = rip;
 793
 794            decode_instruction(env, &decode);
 795            assert(ins_len == decode.len);
 796            exec_instruction(env, &decode);
 797            store_regs(cpu);
 798
 799            break;
 800        }
 801        case EXIT_REASON_CPUID: {
 802            uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 803            uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
 804            uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 805            uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 806
 807            cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 808
 809            wreg(cpu->hvf_fd, HV_X86_RAX, rax);
 810            wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
 811            wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
 812            wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
 813
 814            macvm_set_rip(cpu, rip + ins_len);
 815            break;
 816        }
 817        case EXIT_REASON_XSETBV: {
 818            X86CPU *x86_cpu = X86_CPU(cpu);
 819            CPUX86State *env = &x86_cpu->env;
 820            uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 821            uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 822            uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 823
 824            if (ecx) {
 825                macvm_set_rip(cpu, rip + ins_len);
 826                break;
 827            }
 828            env->xcr0 = ((uint64_t)edx << 32) | eax;
 829            wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
 830            macvm_set_rip(cpu, rip + ins_len);
 831            break;
 832        }
 833        case EXIT_REASON_INTR_WINDOW:
 834            vmx_clear_int_window_exiting(cpu);
 835            ret = EXCP_INTERRUPT;
 836            break;
 837        case EXIT_REASON_NMI_WINDOW:
 838            vmx_clear_nmi_window_exiting(cpu);
 839            ret = EXCP_INTERRUPT;
 840            break;
 841        case EXIT_REASON_EXT_INTR:
 842            /* force exit and allow io handling */
 843            ret = EXCP_INTERRUPT;
 844            break;
 845        case EXIT_REASON_RDMSR:
 846        case EXIT_REASON_WRMSR:
 847        {
 848            load_regs(cpu);
 849            if (exit_reason == EXIT_REASON_RDMSR) {
 850                simulate_rdmsr(cpu);
 851            } else {
 852                simulate_wrmsr(cpu);
 853            }
 854            RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
 855            store_regs(cpu);
 856            break;
 857        }
 858        case EXIT_REASON_CR_ACCESS: {
 859            int cr;
 860            int reg;
 861
 862            load_regs(cpu);
 863            cr = exit_qual & 15;
 864            reg = (exit_qual >> 8) & 15;
 865
 866            switch (cr) {
 867            case 0x0: {
 868                macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
 869                break;
 870            }
 871            case 4: {
 872                macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
 873                break;
 874            }
 875            case 8: {
 876                X86CPU *x86_cpu = X86_CPU(cpu);
 877                if (exit_qual & 0x10) {
 878                    RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
 879                } else {
 880                    int tpr = RRX(env, reg);
 881                    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 882                    ret = EXCP_INTERRUPT;
 883                }
 884                break;
 885            }
 886            default:
 887                error_report("Unrecognized CR %d", cr);
 888                abort();
 889            }
 890            RIP(env) += ins_len;
 891            store_regs(cpu);
 892            break;
 893        }
 894        case EXIT_REASON_APIC_ACCESS: { /* TODO */
 895            struct x86_decode decode;
 896
 897            load_regs(cpu);
 898            env->hvf_emul->fetch_rip = rip;
 899
 900            decode_instruction(env, &decode);
 901            exec_instruction(env, &decode);
 902            store_regs(cpu);
 903            break;
 904        }
 905        case EXIT_REASON_TPR: {
 906            ret = 1;
 907            break;
 908        }
 909        case EXIT_REASON_TASK_SWITCH: {
 910            uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 911            x68_segment_selector sel = {.sel = exit_qual & 0xffff};
 912            vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
 913             vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
 914             & VMCS_INTR_T_MASK);
 915            break;
 916        }
 917        case EXIT_REASON_TRIPLE_FAULT: {
 918            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 919            ret = EXCP_INTERRUPT;
 920            break;
 921        }
 922        case EXIT_REASON_RDPMC:
 923            wreg(cpu->hvf_fd, HV_X86_RAX, 0);
 924            wreg(cpu->hvf_fd, HV_X86_RDX, 0);
 925            macvm_set_rip(cpu, rip + ins_len);
 926            break;
 927        case VMX_REASON_VMCALL:
 928            env->exception_injected = EXCP0D_GPF;
 929            env->has_error_code = true;
 930            env->error_code = 0;
 931            break;
 932        default:
 933            error_report("%llx: unhandled exit %llx", rip, exit_reason);
 934        }
 935    } while (ret == 0);
 936
 937    return ret;
 938}
 939
 940static bool hvf_allowed;
 941
 942static int hvf_accel_init(MachineState *ms)
 943{
 944    int x;
 945    hv_return_t ret;
 946    HVFState *s;
 947
 948    hvf_disable(0);
 949    ret = hv_vm_create(HV_VM_DEFAULT);
 950    assert_hvf_ok(ret);
 951
 952    s = g_new0(HVFState, 1);
 953 
 954    s->num_slots = 32;
 955    for (x = 0; x < s->num_slots; ++x) {
 956        s->slots[x].size = 0;
 957        s->slots[x].slot_id = x;
 958    }
 959  
 960    hvf_state = s;
 961    cpu_interrupt_handler = hvf_handle_interrupt;
 962    memory_listener_register(&hvf_memory_listener, &address_space_memory);
 963    return 0;
 964}
 965
 966static void hvf_accel_class_init(ObjectClass *oc, void *data)
 967{
 968    AccelClass *ac = ACCEL_CLASS(oc);
 969    ac->name = "HVF";
 970    ac->init_machine = hvf_accel_init;
 971    ac->allowed = &hvf_allowed;
 972}
 973
 974static const TypeInfo hvf_accel_type = {
 975    .name = TYPE_HVF_ACCEL,
 976    .parent = TYPE_ACCEL,
 977    .class_init = hvf_accel_class_init,
 978};
 979
 980static void hvf_type_init(void)
 981{
 982    type_register_static(&hvf_accel_type);
 983}
 984
 985type_init(hvf_type_init);
 986