qemu/target/i386/hvf/hvf.c
<<
>>
Prefs
   1/* Copyright 2008 IBM Corporation
   2 *           2008 Red Hat, Inc.
   3 * Copyright 2011 Intel Corporation
   4 * Copyright 2016 Veertu, Inc.
   5 * Copyright 2017 The Android Open Source Project
   6 *
   7 * QEMU Hypervisor.framework support
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of version 2 of the GNU General Public
  11 * License as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  20 *
  21 * This file contain code under public domain from the hvdos project:
  22 * https://github.com/mist64/hvdos
  23 *
  24 * Parts Copyright (c) 2011 NetApp, Inc.
  25 * All rights reserved.
  26 *
  27 * Redistribution and use in source and binary forms, with or without
  28 * modification, are permitted provided that the following conditions
  29 * are met:
  30 * 1. Redistributions of source code must retain the above copyright
  31 *    notice, this list of conditions and the following disclaimer.
  32 * 2. Redistributions in binary form must reproduce the above copyright
  33 *    notice, this list of conditions and the following disclaimer in the
  34 *    documentation and/or other materials provided with the distribution.
  35 *
  36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46 * SUCH DAMAGE.
  47 */
  48#include "qemu/osdep.h"
  49#include "qemu-common.h"
  50#include "qemu/error-report.h"
  51
  52#include "sysemu/hvf.h"
  53#include "hvf-i386.h"
  54#include "vmcs.h"
  55#include "vmx.h"
  56#include "x86.h"
  57#include "x86_descr.h"
  58#include "x86_mmu.h"
  59#include "x86_decode.h"
  60#include "x86_emu.h"
  61#include "x86_task.h"
  62#include "x86hvf.h"
  63
  64#include <Hypervisor/hv.h>
  65#include <Hypervisor/hv_vmx.h>
  66
  67#include "exec/address-spaces.h"
  68#include "hw/i386/apic_internal.h"
  69#include "hw/boards.h"
  70#include "qemu/main-loop.h"
  71#include "sysemu/accel.h"
  72#include "sysemu/sysemu.h"
  73#include "target/i386/cpu.h"
  74
  75pthread_rwlock_t mem_lock = PTHREAD_RWLOCK_INITIALIZER;
  76HVFState *hvf_state;
  77int hvf_disabled = 1;
  78
  79static void assert_hvf_ok(hv_return_t ret)
  80{
  81    if (ret == HV_SUCCESS) {
  82        return;
  83    }
  84
  85    switch (ret) {
  86    case HV_ERROR:
  87        error_report("Error: HV_ERROR");
  88        break;
  89    case HV_BUSY:
  90        error_report("Error: HV_BUSY");
  91        break;
  92    case HV_BAD_ARGUMENT:
  93        error_report("Error: HV_BAD_ARGUMENT");
  94        break;
  95    case HV_NO_RESOURCES:
  96        error_report("Error: HV_NO_RESOURCES");
  97        break;
  98    case HV_NO_DEVICE:
  99        error_report("Error: HV_NO_DEVICE");
 100        break;
 101    case HV_UNSUPPORTED:
 102        error_report("Error: HV_UNSUPPORTED");
 103        break;
 104    default:
 105        error_report("Unknown Error");
 106    }
 107
 108    abort();
 109}
 110
 111/* Memory slots */
 112hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end)
 113{
 114    hvf_slot *slot;
 115    int x;
 116    for (x = 0; x < hvf_state->num_slots; ++x) {
 117        slot = &hvf_state->slots[x];
 118        if (slot->size && start < (slot->start + slot->size) &&
 119            end > slot->start) {
 120            return slot;
 121        }
 122    }
 123    return NULL;
 124}
 125
 126struct mac_slot {
 127    int present;
 128    uint64_t size;
 129    uint64_t gpa_start;
 130    uint64_t gva;
 131};
 132
 133struct mac_slot mac_slots[32];
 134#define ALIGN(x, y)  (((x) + (y) - 1) & ~((y) - 1))
 135
 136static int do_hvf_set_memory(hvf_slot *slot)
 137{
 138    struct mac_slot *macslot;
 139    hv_memory_flags_t flags;
 140    hv_return_t ret;
 141
 142    macslot = &mac_slots[slot->slot_id];
 143
 144    if (macslot->present) {
 145        if (macslot->size != slot->size) {
 146            macslot->present = 0;
 147            ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
 148            assert_hvf_ok(ret);
 149        }
 150    }
 151
 152    if (!slot->size) {
 153        return 0;
 154    }
 155
 156    flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
 157
 158    macslot->present = 1;
 159    macslot->gpa_start = slot->start;
 160    macslot->size = slot->size;
 161    ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
 162    assert_hvf_ok(ret);
 163    return 0;
 164}
 165
 166void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
 167{
 168    hvf_slot *mem;
 169    MemoryRegion *area = section->mr;
 170
 171    if (!memory_region_is_ram(area)) {
 172        return;
 173    }
 174
 175    mem = hvf_find_overlap_slot(
 176            section->offset_within_address_space,
 177            section->offset_within_address_space + int128_get64(section->size));
 178
 179    if (mem && add) {
 180        if (mem->size == int128_get64(section->size) &&
 181            mem->start == section->offset_within_address_space &&
 182            mem->mem == (memory_region_get_ram_ptr(area) +
 183            section->offset_within_region)) {
 184            return; /* Same region was attempted to register, go away. */
 185        }
 186    }
 187
 188    /* Region needs to be reset. set the size to 0 and remap it. */
 189    if (mem) {
 190        mem->size = 0;
 191        if (do_hvf_set_memory(mem)) {
 192            error_report("Failed to reset overlapping slot");
 193            abort();
 194        }
 195    }
 196
 197    if (!add) {
 198        return;
 199    }
 200
 201    /* Now make a new slot. */
 202    int x;
 203
 204    for (x = 0; x < hvf_state->num_slots; ++x) {
 205        mem = &hvf_state->slots[x];
 206        if (!mem->size) {
 207            break;
 208        }
 209    }
 210
 211    if (x == hvf_state->num_slots) {
 212        error_report("No free slots");
 213        abort();
 214    }
 215
 216    mem->size = int128_get64(section->size);
 217    mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
 218    mem->start = section->offset_within_address_space;
 219    mem->region = area;
 220
 221    if (do_hvf_set_memory(mem)) {
 222        error_report("Error registering new memory slot");
 223        abort();
 224    }
 225}
 226
 227void vmx_update_tpr(CPUState *cpu)
 228{
 229    /* TODO: need integrate APIC handling */
 230    X86CPU *x86_cpu = X86_CPU(cpu);
 231    int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
 232    int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 233
 234    wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
 235    if (irr == -1) {
 236        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 237    } else {
 238        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
 239              irr >> 4);
 240    }
 241}
 242
 243void update_apic_tpr(CPUState *cpu)
 244{
 245    X86CPU *x86_cpu = X86_CPU(cpu);
 246    int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
 247    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 248}
 249
 250#define VECTORING_INFO_VECTOR_MASK     0xff
 251
 252static void hvf_handle_interrupt(CPUState * cpu, int mask)
 253{
 254    cpu->interrupt_request |= mask;
 255    if (!qemu_cpu_is_self(cpu)) {
 256        qemu_cpu_kick(cpu);
 257    }
 258}
 259
 260void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
 261                  int direction, int size, int count)
 262{
 263    int i;
 264    uint8_t *ptr = buffer;
 265
 266    for (i = 0; i < count; i++) {
 267        address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
 268                         ptr, size,
 269                         direction);
 270        ptr += size;
 271    }
 272}
 273
 274/* TODO: synchronize vcpu state */
 275static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 276{
 277    CPUState *cpu_state = cpu;
 278    if (cpu_state->vcpu_dirty == 0) {
 279        hvf_get_registers(cpu_state);
 280    }
 281
 282    cpu_state->vcpu_dirty = 1;
 283}
 284
 285void hvf_cpu_synchronize_state(CPUState *cpu_state)
 286{
 287    if (cpu_state->vcpu_dirty == 0) {
 288        run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
 289    }
 290}
 291
 292static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 293{
 294    CPUState *cpu_state = cpu;
 295    hvf_put_registers(cpu_state);
 296    cpu_state->vcpu_dirty = false;
 297}
 298
 299void hvf_cpu_synchronize_post_reset(CPUState *cpu_state)
 300{
 301    run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 302}
 303
 304void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 305{
 306    CPUState *cpu_state = cpu;
 307    hvf_put_registers(cpu_state);
 308    cpu_state->vcpu_dirty = false;
 309}
 310
 311void hvf_cpu_synchronize_post_init(CPUState *cpu_state)
 312{
 313    run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 314}
 315
 316static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
 317{
 318    int read, write;
 319
 320    /* EPT fault on an instruction fetch doesn't make sense here */
 321    if (ept_qual & EPT_VIOLATION_INST_FETCH) {
 322        return false;
 323    }
 324
 325    /* EPT fault must be a read fault or a write fault */
 326    read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
 327    write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
 328    if ((read | write) == 0) {
 329        return false;
 330    }
 331
 332    if (write && slot) {
 333        if (slot->flags & HVF_SLOT_LOG) {
 334            memory_region_set_dirty(slot->region, gpa - slot->start, 1);
 335            hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 336                          HV_MEMORY_READ | HV_MEMORY_WRITE);
 337        }
 338    }
 339
 340    /*
 341     * The EPT violation must have been caused by accessing a
 342     * guest-physical address that is a translation of a guest-linear
 343     * address.
 344     */
 345    if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
 346        (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
 347        return false;
 348    }
 349
 350    return !slot;
 351}
 352
 353static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
 354{
 355    hvf_slot *slot;
 356
 357    slot = hvf_find_overlap_slot(
 358            section->offset_within_address_space,
 359            section->offset_within_address_space + int128_get64(section->size));
 360
 361    /* protect region against writes; begin tracking it */
 362    if (on) {
 363        slot->flags |= HVF_SLOT_LOG;
 364        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 365                      HV_MEMORY_READ);
 366    /* stop tracking region*/
 367    } else {
 368        slot->flags &= ~HVF_SLOT_LOG;
 369        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 370                      HV_MEMORY_READ | HV_MEMORY_WRITE);
 371    }
 372}
 373
 374static void hvf_log_start(MemoryListener *listener,
 375                          MemoryRegionSection *section, int old, int new)
 376{
 377    if (old != 0) {
 378        return;
 379    }
 380
 381    hvf_set_dirty_tracking(section, 1);
 382}
 383
 384static void hvf_log_stop(MemoryListener *listener,
 385                         MemoryRegionSection *section, int old, int new)
 386{
 387    if (new != 0) {
 388        return;
 389    }
 390
 391    hvf_set_dirty_tracking(section, 0);
 392}
 393
 394static void hvf_log_sync(MemoryListener *listener,
 395                         MemoryRegionSection *section)
 396{
 397    /*
 398     * sync of dirty pages is handled elsewhere; just make sure we keep
 399     * tracking the region.
 400     */
 401    hvf_set_dirty_tracking(section, 1);
 402}
 403
 404static void hvf_region_add(MemoryListener *listener,
 405                           MemoryRegionSection *section)
 406{
 407    hvf_set_phys_mem(section, true);
 408}
 409
 410static void hvf_region_del(MemoryListener *listener,
 411                           MemoryRegionSection *section)
 412{
 413    hvf_set_phys_mem(section, false);
 414}
 415
 416static MemoryListener hvf_memory_listener = {
 417    .priority = 10,
 418    .region_add = hvf_region_add,
 419    .region_del = hvf_region_del,
 420    .log_start = hvf_log_start,
 421    .log_stop = hvf_log_stop,
 422    .log_sync = hvf_log_sync,
 423};
 424
 425void hvf_reset_vcpu(CPUState *cpu) {
 426
 427    /* TODO: this shouldn't be needed; there is already a call to
 428     * cpu_synchronize_all_post_reset in vl.c
 429     */
 430    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0);
 431    wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0);
 432    macvm_set_cr0(cpu->hvf_fd, 0x60000010);
 433
 434    wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK);
 435    wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0);
 436    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK);
 437
 438    /* set VMCS guest state fields */
 439    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000);
 440    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff);
 441    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b);
 442    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000);
 443
 444    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0);
 445    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff);
 446    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93);
 447    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0);
 448
 449    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0);
 450    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff);
 451    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93);
 452    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0);
 453
 454    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0);
 455    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff);
 456    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93);
 457    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0);
 458
 459    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0);
 460    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff);
 461    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93);
 462    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0);
 463
 464    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0);
 465    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff);
 466    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93);
 467    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0);
 468
 469    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0);
 470    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0);
 471    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000);
 472    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0);
 473
 474    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0);
 475    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0);
 476    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83);
 477    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0);
 478
 479    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0);
 480    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0);
 481
 482    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0);
 483    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0);
 484
 485    /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/
 486    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0);
 487
 488    wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0);
 489    wreg(cpu->hvf_fd, HV_X86_RDX, 0x623);
 490    wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2);
 491    wreg(cpu->hvf_fd, HV_X86_RSP, 0x0);
 492    wreg(cpu->hvf_fd, HV_X86_RAX, 0x0);
 493    wreg(cpu->hvf_fd, HV_X86_RBX, 0x0);
 494    wreg(cpu->hvf_fd, HV_X86_RCX, 0x0);
 495    wreg(cpu->hvf_fd, HV_X86_RSI, 0x0);
 496    wreg(cpu->hvf_fd, HV_X86_RDI, 0x0);
 497    wreg(cpu->hvf_fd, HV_X86_RBP, 0x0);
 498
 499    for (int i = 0; i < 8; i++) {
 500        wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0);
 501    }
 502
 503    hv_vm_sync_tsc(0);
 504    cpu->halted = 0;
 505    hv_vcpu_invalidate_tlb(cpu->hvf_fd);
 506    hv_vcpu_flush(cpu->hvf_fd);
 507}
 508
 509void hvf_vcpu_destroy(CPUState *cpu)
 510{
 511    hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
 512    assert_hvf_ok(ret);
 513}
 514
 515static void dummy_signal(int sig)
 516{
 517}
 518
 519int hvf_init_vcpu(CPUState *cpu)
 520{
 521
 522    X86CPU *x86cpu = X86_CPU(cpu);
 523    CPUX86State *env = &x86cpu->env;
 524    int r;
 525
 526    /* init cpu signals */
 527    sigset_t set;
 528    struct sigaction sigact;
 529
 530    memset(&sigact, 0, sizeof(sigact));
 531    sigact.sa_handler = dummy_signal;
 532    sigaction(SIG_IPI, &sigact, NULL);
 533
 534    pthread_sigmask(SIG_BLOCK, NULL, &set);
 535    sigdelset(&set, SIG_IPI);
 536
 537    init_emu();
 538    init_decoder();
 539
 540    hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
 541    env->hvf_emul = g_new0(HVFX86EmulatorState, 1);
 542
 543    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
 544    cpu->vcpu_dirty = 1;
 545    assert_hvf_ok(r);
 546
 547    if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
 548        &hvf_state->hvf_caps->vmx_cap_pinbased)) {
 549        abort();
 550    }
 551    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
 552        &hvf_state->hvf_caps->vmx_cap_procbased)) {
 553        abort();
 554    }
 555    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
 556        &hvf_state->hvf_caps->vmx_cap_procbased2)) {
 557        abort();
 558    }
 559    if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
 560        &hvf_state->hvf_caps->vmx_cap_entry)) {
 561        abort();
 562    }
 563
 564    /* set VMCS control fields */
 565    wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
 566          cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
 567          VMCS_PIN_BASED_CTLS_EXTINT |
 568          VMCS_PIN_BASED_CTLS_NMI |
 569          VMCS_PIN_BASED_CTLS_VNMI));
 570    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
 571          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
 572          VMCS_PRI_PROC_BASED_CTLS_HLT |
 573          VMCS_PRI_PROC_BASED_CTLS_MWAIT |
 574          VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
 575          VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
 576          VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
 577    wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
 578          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
 579                   VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 580
 581    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
 582          0));
 583    wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
 584
 585    wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 586
 587    hvf_reset_vcpu(cpu);
 588
 589    x86cpu = X86_CPU(cpu);
 590    x86cpu->env.kvm_xsave_buf = qemu_memalign(4096, 4096);
 591
 592    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
 593    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
 594    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
 595    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
 596    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
 597    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
 598    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
 599    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
 600    /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/
 601    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
 602    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
 603    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
 604
 605    return 0;
 606}
 607
 608void hvf_disable(int shouldDisable)
 609{
 610    hvf_disabled = shouldDisable;
 611}
 612
 613static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
 614{
 615    X86CPU *x86_cpu = X86_CPU(cpu);
 616    CPUX86State *env = &x86_cpu->env;
 617
 618    env->exception_injected = -1;
 619    env->interrupt_injected = -1;
 620    env->nmi_injected = false;
 621    if (idtvec_info & VMCS_IDT_VEC_VALID) {
 622        switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
 623        case VMCS_IDT_VEC_HWINTR:
 624        case VMCS_IDT_VEC_SWINTR:
 625            env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 626            break;
 627        case VMCS_IDT_VEC_NMI:
 628            env->nmi_injected = true;
 629            break;
 630        case VMCS_IDT_VEC_HWEXCEPTION:
 631        case VMCS_IDT_VEC_SWEXCEPTION:
 632            env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 633            break;
 634        case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
 635        default:
 636            abort();
 637        }
 638        if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
 639            (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
 640            env->ins_len = ins_len;
 641        }
 642        if (idtvec_info & VMCS_INTR_DEL_ERRCODE) {
 643            env->has_error_code = true;
 644            env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
 645        }
 646    }
 647    if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 648        VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
 649        env->hflags2 |= HF2_NMI_MASK;
 650    } else {
 651        env->hflags2 &= ~HF2_NMI_MASK;
 652    }
 653    if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 654         (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
 655         VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
 656        env->hflags |= HF_INHIBIT_IRQ_MASK;
 657    } else {
 658        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
 659    }
 660}
 661
 662int hvf_vcpu_exec(CPUState *cpu)
 663{
 664    X86CPU *x86_cpu = X86_CPU(cpu);
 665    CPUX86State *env = &x86_cpu->env;
 666    int ret = 0;
 667    uint64_t rip = 0;
 668
 669    cpu->halted = 0;
 670
 671    if (hvf_process_events(cpu)) {
 672        return EXCP_HLT;
 673    }
 674
 675    do {
 676        if (cpu->vcpu_dirty) {
 677            hvf_put_registers(cpu);
 678            cpu->vcpu_dirty = false;
 679        }
 680
 681        if (hvf_inject_interrupts(cpu)) {
 682            return EXCP_INTERRUPT;
 683        }
 684        vmx_update_tpr(cpu);
 685
 686        qemu_mutex_unlock_iothread();
 687        if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
 688            qemu_mutex_lock_iothread();
 689            return EXCP_HLT;
 690        }
 691
 692        hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
 693        assert_hvf_ok(r);
 694
 695        /* handle VMEXIT */
 696        uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
 697        uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
 698        uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
 699                                           VMCS_EXIT_INSTRUCTION_LENGTH);
 700
 701        uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 702
 703        hvf_store_events(cpu, ins_len, idtvec_info);
 704        rip = rreg(cpu->hvf_fd, HV_X86_RIP);
 705        RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
 706        env->eflags = RFLAGS(env);
 707
 708        qemu_mutex_lock_iothread();
 709
 710        update_apic_tpr(cpu);
 711        current_cpu = cpu;
 712
 713        ret = 0;
 714        switch (exit_reason) {
 715        case EXIT_REASON_HLT: {
 716            macvm_set_rip(cpu, rip + ins_len);
 717            if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 718                (EFLAGS(env) & IF_MASK))
 719                && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
 720                !(idtvec_info & VMCS_IDT_VEC_VALID)) {
 721                cpu->halted = 1;
 722                ret = EXCP_HLT;
 723            }
 724            ret = EXCP_INTERRUPT;
 725            break;
 726        }
 727        case EXIT_REASON_MWAIT: {
 728            ret = EXCP_INTERRUPT;
 729            break;
 730        }
 731            /* Need to check if MMIO or unmmaped fault */
 732        case EXIT_REASON_EPT_FAULT:
 733        {
 734            hvf_slot *slot;
 735            uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
 736
 737            if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
 738                ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
 739                vmx_set_nmi_blocking(cpu);
 740            }
 741
 742            slot = hvf_find_overlap_slot(gpa, gpa);
 743            /* mmio */
 744            if (ept_emulation_fault(slot, gpa, exit_qual)) {
 745                struct x86_decode decode;
 746
 747                load_regs(cpu);
 748                env->hvf_emul->fetch_rip = rip;
 749
 750                decode_instruction(env, &decode);
 751                exec_instruction(env, &decode);
 752                store_regs(cpu);
 753                break;
 754            }
 755            break;
 756        }
 757        case EXIT_REASON_INOUT:
 758        {
 759            uint32_t in = (exit_qual & 8) != 0;
 760            uint32_t size =  (exit_qual & 7) + 1;
 761            uint32_t string =  (exit_qual & 16) != 0;
 762            uint32_t port =  exit_qual >> 16;
 763            /*uint32_t rep = (exit_qual & 0x20) != 0;*/
 764
 765            if (!string && in) {
 766                uint64_t val = 0;
 767                load_regs(cpu);
 768                hvf_handle_io(env, port, &val, 0, size, 1);
 769                if (size == 1) {
 770                    AL(env) = val;
 771                } else if (size == 2) {
 772                    AX(env) = val;
 773                } else if (size == 4) {
 774                    RAX(env) = (uint32_t)val;
 775                } else {
 776                    RAX(env) = (uint64_t)val;
 777                }
 778                RIP(env) += ins_len;
 779                store_regs(cpu);
 780                break;
 781            } else if (!string && !in) {
 782                RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
 783                hvf_handle_io(env, port, &RAX(env), 1, size, 1);
 784                macvm_set_rip(cpu, rip + ins_len);
 785                break;
 786            }
 787            struct x86_decode decode;
 788
 789            load_regs(cpu);
 790            env->hvf_emul->fetch_rip = rip;
 791
 792            decode_instruction(env, &decode);
 793            assert(ins_len == decode.len);
 794            exec_instruction(env, &decode);
 795            store_regs(cpu);
 796
 797            break;
 798        }
 799        case EXIT_REASON_CPUID: {
 800            uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 801            uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
 802            uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 803            uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 804
 805            cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 806
 807            wreg(cpu->hvf_fd, HV_X86_RAX, rax);
 808            wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
 809            wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
 810            wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
 811
 812            macvm_set_rip(cpu, rip + ins_len);
 813            break;
 814        }
 815        case EXIT_REASON_XSETBV: {
 816            X86CPU *x86_cpu = X86_CPU(cpu);
 817            CPUX86State *env = &x86_cpu->env;
 818            uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 819            uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 820            uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 821
 822            if (ecx) {
 823                macvm_set_rip(cpu, rip + ins_len);
 824                break;
 825            }
 826            env->xcr0 = ((uint64_t)edx << 32) | eax;
 827            wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
 828            macvm_set_rip(cpu, rip + ins_len);
 829            break;
 830        }
 831        case EXIT_REASON_INTR_WINDOW:
 832            vmx_clear_int_window_exiting(cpu);
 833            ret = EXCP_INTERRUPT;
 834            break;
 835        case EXIT_REASON_NMI_WINDOW:
 836            vmx_clear_nmi_window_exiting(cpu);
 837            ret = EXCP_INTERRUPT;
 838            break;
 839        case EXIT_REASON_EXT_INTR:
 840            /* force exit and allow io handling */
 841            ret = EXCP_INTERRUPT;
 842            break;
 843        case EXIT_REASON_RDMSR:
 844        case EXIT_REASON_WRMSR:
 845        {
 846            load_regs(cpu);
 847            if (exit_reason == EXIT_REASON_RDMSR) {
 848                simulate_rdmsr(cpu);
 849            } else {
 850                simulate_wrmsr(cpu);
 851            }
 852            RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
 853            store_regs(cpu);
 854            break;
 855        }
 856        case EXIT_REASON_CR_ACCESS: {
 857            int cr;
 858            int reg;
 859
 860            load_regs(cpu);
 861            cr = exit_qual & 15;
 862            reg = (exit_qual >> 8) & 15;
 863
 864            switch (cr) {
 865            case 0x0: {
 866                macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
 867                break;
 868            }
 869            case 4: {
 870                macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
 871                break;
 872            }
 873            case 8: {
 874                X86CPU *x86_cpu = X86_CPU(cpu);
 875                if (exit_qual & 0x10) {
 876                    RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
 877                } else {
 878                    int tpr = RRX(env, reg);
 879                    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 880                    ret = EXCP_INTERRUPT;
 881                }
 882                break;
 883            }
 884            default:
 885                error_report("Unrecognized CR %d", cr);
 886                abort();
 887            }
 888            RIP(env) += ins_len;
 889            store_regs(cpu);
 890            break;
 891        }
 892        case EXIT_REASON_APIC_ACCESS: { /* TODO */
 893            struct x86_decode decode;
 894
 895            load_regs(cpu);
 896            env->hvf_emul->fetch_rip = rip;
 897
 898            decode_instruction(env, &decode);
 899            exec_instruction(env, &decode);
 900            store_regs(cpu);
 901            break;
 902        }
 903        case EXIT_REASON_TPR: {
 904            ret = 1;
 905            break;
 906        }
 907        case EXIT_REASON_TASK_SWITCH: {
 908            uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 909            x68_segment_selector sel = {.sel = exit_qual & 0xffff};
 910            vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
 911             vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
 912             & VMCS_INTR_T_MASK);
 913            break;
 914        }
 915        case EXIT_REASON_TRIPLE_FAULT: {
 916            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 917            ret = EXCP_INTERRUPT;
 918            break;
 919        }
 920        case EXIT_REASON_RDPMC:
 921            wreg(cpu->hvf_fd, HV_X86_RAX, 0);
 922            wreg(cpu->hvf_fd, HV_X86_RDX, 0);
 923            macvm_set_rip(cpu, rip + ins_len);
 924            break;
 925        case VMX_REASON_VMCALL:
 926            env->exception_injected = EXCP0D_GPF;
 927            env->has_error_code = true;
 928            env->error_code = 0;
 929            break;
 930        default:
 931            error_report("%llx: unhandled exit %llx", rip, exit_reason);
 932        }
 933    } while (ret == 0);
 934
 935    return ret;
 936}
 937
 938static bool hvf_allowed;
 939
 940static int hvf_accel_init(MachineState *ms)
 941{
 942    int x;
 943    hv_return_t ret;
 944    HVFState *s;
 945
 946    hvf_disable(0);
 947    ret = hv_vm_create(HV_VM_DEFAULT);
 948    assert_hvf_ok(ret);
 949
 950    s = g_new0(HVFState, 1);
 951 
 952    s->num_slots = 32;
 953    for (x = 0; x < s->num_slots; ++x) {
 954        s->slots[x].size = 0;
 955        s->slots[x].slot_id = x;
 956    }
 957  
 958    hvf_state = s;
 959    cpu_interrupt_handler = hvf_handle_interrupt;
 960    memory_listener_register(&hvf_memory_listener, &address_space_memory);
 961    return 0;
 962}
 963
 964static void hvf_accel_class_init(ObjectClass *oc, void *data)
 965{
 966    AccelClass *ac = ACCEL_CLASS(oc);
 967    ac->name = "HVF";
 968    ac->init_machine = hvf_accel_init;
 969    ac->allowed = &hvf_allowed;
 970}
 971
 972static const TypeInfo hvf_accel_type = {
 973    .name = TYPE_HVF_ACCEL,
 974    .parent = TYPE_ACCEL,
 975    .class_init = hvf_accel_class_init,
 976};
 977
 978static void hvf_type_init(void)
 979{
 980    type_register_static(&hvf_accel_type);
 981}
 982
 983type_init(hvf_type_init);
 984