qemu/target/i386/hvf/hvf.c
<<
>>
Prefs
   1/* Copyright 2008 IBM Corporation
   2 *           2008 Red Hat, Inc.
   3 * Copyright 2011 Intel Corporation
   4 * Copyright 2016 Veertu, Inc.
   5 * Copyright 2017 The Android Open Source Project
   6 *
   7 * QEMU Hypervisor.framework support
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of version 2 of the GNU General Public
  11 * License as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * Lesser General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU Lesser General Public
  19 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  20 *
  21 * This file contain code under public domain from the hvdos project:
  22 * https://github.com/mist64/hvdos
  23 *
  24 * Parts Copyright (c) 2011 NetApp, Inc.
  25 * All rights reserved.
  26 *
  27 * Redistribution and use in source and binary forms, with or without
  28 * modification, are permitted provided that the following conditions
  29 * are met:
  30 * 1. Redistributions of source code must retain the above copyright
  31 *    notice, this list of conditions and the following disclaimer.
  32 * 2. Redistributions in binary form must reproduce the above copyright
  33 *    notice, this list of conditions and the following disclaimer in the
  34 *    documentation and/or other materials provided with the distribution.
  35 *
  36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46 * SUCH DAMAGE.
  47 */
  48#include "qemu/osdep.h"
  49#include "qemu-common.h"
  50#include "qemu/error-report.h"
  51
  52#include "sysemu/hvf.h"
  53#include "hvf-i386.h"
  54#include "vmcs.h"
  55#include "vmx.h"
  56#include "x86.h"
  57#include "x86_descr.h"
  58#include "x86_mmu.h"
  59#include "x86_decode.h"
  60#include "x86_emu.h"
  61#include "x86_task.h"
  62#include "x86hvf.h"
  63
  64#include <Hypervisor/hv.h>
  65#include <Hypervisor/hv_vmx.h>
  66
  67#include "exec/address-spaces.h"
  68#include "hw/i386/apic_internal.h"
  69#include "hw/boards.h"
  70#include "qemu/main-loop.h"
  71#include "sysemu/accel.h"
  72#include "sysemu/sysemu.h"
  73#include "target/i386/cpu.h"
  74
  75HVFState *hvf_state;
  76
  77static void assert_hvf_ok(hv_return_t ret)
  78{
  79    if (ret == HV_SUCCESS) {
  80        return;
  81    }
  82
  83    switch (ret) {
  84    case HV_ERROR:
  85        error_report("Error: HV_ERROR");
  86        break;
  87    case HV_BUSY:
  88        error_report("Error: HV_BUSY");
  89        break;
  90    case HV_BAD_ARGUMENT:
  91        error_report("Error: HV_BAD_ARGUMENT");
  92        break;
  93    case HV_NO_RESOURCES:
  94        error_report("Error: HV_NO_RESOURCES");
  95        break;
  96    case HV_NO_DEVICE:
  97        error_report("Error: HV_NO_DEVICE");
  98        break;
  99    case HV_UNSUPPORTED:
 100        error_report("Error: HV_UNSUPPORTED");
 101        break;
 102    default:
 103        error_report("Unknown Error");
 104    }
 105
 106    abort();
 107}
 108
 109/* Memory slots */
 110hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end)
 111{
 112    hvf_slot *slot;
 113    int x;
 114    for (x = 0; x < hvf_state->num_slots; ++x) {
 115        slot = &hvf_state->slots[x];
 116        if (slot->size && start < (slot->start + slot->size) &&
 117            end > slot->start) {
 118            return slot;
 119        }
 120    }
 121    return NULL;
 122}
 123
 124struct mac_slot {
 125    int present;
 126    uint64_t size;
 127    uint64_t gpa_start;
 128    uint64_t gva;
 129};
 130
 131struct mac_slot mac_slots[32];
 132#define ALIGN(x, y)  (((x) + (y) - 1) & ~((y) - 1))
 133
 134static int do_hvf_set_memory(hvf_slot *slot)
 135{
 136    struct mac_slot *macslot;
 137    hv_memory_flags_t flags;
 138    hv_return_t ret;
 139
 140    macslot = &mac_slots[slot->slot_id];
 141
 142    if (macslot->present) {
 143        if (macslot->size != slot->size) {
 144            macslot->present = 0;
 145            ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
 146            assert_hvf_ok(ret);
 147        }
 148    }
 149
 150    if (!slot->size) {
 151        return 0;
 152    }
 153
 154    flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
 155
 156    macslot->present = 1;
 157    macslot->gpa_start = slot->start;
 158    macslot->size = slot->size;
 159    ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
 160    assert_hvf_ok(ret);
 161    return 0;
 162}
 163
 164void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
 165{
 166    hvf_slot *mem;
 167    MemoryRegion *area = section->mr;
 168
 169    if (!memory_region_is_ram(area)) {
 170        return;
 171    }
 172
 173    mem = hvf_find_overlap_slot(
 174            section->offset_within_address_space,
 175            section->offset_within_address_space + int128_get64(section->size));
 176
 177    if (mem && add) {
 178        if (mem->size == int128_get64(section->size) &&
 179            mem->start == section->offset_within_address_space &&
 180            mem->mem == (memory_region_get_ram_ptr(area) +
 181            section->offset_within_region)) {
 182            return; /* Same region was attempted to register, go away. */
 183        }
 184    }
 185
 186    /* Region needs to be reset. set the size to 0 and remap it. */
 187    if (mem) {
 188        mem->size = 0;
 189        if (do_hvf_set_memory(mem)) {
 190            error_report("Failed to reset overlapping slot");
 191            abort();
 192        }
 193    }
 194
 195    if (!add) {
 196        return;
 197    }
 198
 199    /* Now make a new slot. */
 200    int x;
 201
 202    for (x = 0; x < hvf_state->num_slots; ++x) {
 203        mem = &hvf_state->slots[x];
 204        if (!mem->size) {
 205            break;
 206        }
 207    }
 208
 209    if (x == hvf_state->num_slots) {
 210        error_report("No free slots");
 211        abort();
 212    }
 213
 214    mem->size = int128_get64(section->size);
 215    mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
 216    mem->start = section->offset_within_address_space;
 217    mem->region = area;
 218
 219    if (do_hvf_set_memory(mem)) {
 220        error_report("Error registering new memory slot");
 221        abort();
 222    }
 223}
 224
 225void vmx_update_tpr(CPUState *cpu)
 226{
 227    /* TODO: need integrate APIC handling */
 228    X86CPU *x86_cpu = X86_CPU(cpu);
 229    int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
 230    int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 231
 232    wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
 233    if (irr == -1) {
 234        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 235    } else {
 236        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
 237              irr >> 4);
 238    }
 239}
 240
 241void update_apic_tpr(CPUState *cpu)
 242{
 243    X86CPU *x86_cpu = X86_CPU(cpu);
 244    int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
 245    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 246}
 247
 248#define VECTORING_INFO_VECTOR_MASK     0xff
 249
 250static void hvf_handle_interrupt(CPUState * cpu, int mask)
 251{
 252    cpu->interrupt_request |= mask;
 253    if (!qemu_cpu_is_self(cpu)) {
 254        qemu_cpu_kick(cpu);
 255    }
 256}
 257
 258void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
 259                  int direction, int size, int count)
 260{
 261    int i;
 262    uint8_t *ptr = buffer;
 263
 264    for (i = 0; i < count; i++) {
 265        address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
 266                         ptr, size,
 267                         direction);
 268        ptr += size;
 269    }
 270}
 271
 272/* TODO: synchronize vcpu state */
 273static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 274{
 275    CPUState *cpu_state = cpu;
 276    if (cpu_state->vcpu_dirty == 0) {
 277        hvf_get_registers(cpu_state);
 278    }
 279
 280    cpu_state->vcpu_dirty = 1;
 281}
 282
 283void hvf_cpu_synchronize_state(CPUState *cpu_state)
 284{
 285    if (cpu_state->vcpu_dirty == 0) {
 286        run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
 287    }
 288}
 289
 290static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 291{
 292    CPUState *cpu_state = cpu;
 293    hvf_put_registers(cpu_state);
 294    cpu_state->vcpu_dirty = false;
 295}
 296
 297void hvf_cpu_synchronize_post_reset(CPUState *cpu_state)
 298{
 299    run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 300}
 301
 302void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 303{
 304    CPUState *cpu_state = cpu;
 305    hvf_put_registers(cpu_state);
 306    cpu_state->vcpu_dirty = false;
 307}
 308
 309void hvf_cpu_synchronize_post_init(CPUState *cpu_state)
 310{
 311    run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 312}
 313
 314static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
 315{
 316    int read, write;
 317
 318    /* EPT fault on an instruction fetch doesn't make sense here */
 319    if (ept_qual & EPT_VIOLATION_INST_FETCH) {
 320        return false;
 321    }
 322
 323    /* EPT fault must be a read fault or a write fault */
 324    read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
 325    write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
 326    if ((read | write) == 0) {
 327        return false;
 328    }
 329
 330    if (write && slot) {
 331        if (slot->flags & HVF_SLOT_LOG) {
 332            memory_region_set_dirty(slot->region, gpa - slot->start, 1);
 333            hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 334                          HV_MEMORY_READ | HV_MEMORY_WRITE);
 335        }
 336    }
 337
 338    /*
 339     * The EPT violation must have been caused by accessing a
 340     * guest-physical address that is a translation of a guest-linear
 341     * address.
 342     */
 343    if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
 344        (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
 345        return false;
 346    }
 347
 348    return !slot;
 349}
 350
 351static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
 352{
 353    hvf_slot *slot;
 354
 355    slot = hvf_find_overlap_slot(
 356            section->offset_within_address_space,
 357            section->offset_within_address_space + int128_get64(section->size));
 358
 359    /* protect region against writes; begin tracking it */
 360    if (on) {
 361        slot->flags |= HVF_SLOT_LOG;
 362        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 363                      HV_MEMORY_READ);
 364    /* stop tracking region*/
 365    } else {
 366        slot->flags &= ~HVF_SLOT_LOG;
 367        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 368                      HV_MEMORY_READ | HV_MEMORY_WRITE);
 369    }
 370}
 371
 372static void hvf_log_start(MemoryListener *listener,
 373                          MemoryRegionSection *section, int old, int new)
 374{
 375    if (old != 0) {
 376        return;
 377    }
 378
 379    hvf_set_dirty_tracking(section, 1);
 380}
 381
 382static void hvf_log_stop(MemoryListener *listener,
 383                         MemoryRegionSection *section, int old, int new)
 384{
 385    if (new != 0) {
 386        return;
 387    }
 388
 389    hvf_set_dirty_tracking(section, 0);
 390}
 391
 392static void hvf_log_sync(MemoryListener *listener,
 393                         MemoryRegionSection *section)
 394{
 395    /*
 396     * sync of dirty pages is handled elsewhere; just make sure we keep
 397     * tracking the region.
 398     */
 399    hvf_set_dirty_tracking(section, 1);
 400}
 401
 402static void hvf_region_add(MemoryListener *listener,
 403                           MemoryRegionSection *section)
 404{
 405    hvf_set_phys_mem(section, true);
 406}
 407
 408static void hvf_region_del(MemoryListener *listener,
 409                           MemoryRegionSection *section)
 410{
 411    hvf_set_phys_mem(section, false);
 412}
 413
 414static MemoryListener hvf_memory_listener = {
 415    .priority = 10,
 416    .region_add = hvf_region_add,
 417    .region_del = hvf_region_del,
 418    .log_start = hvf_log_start,
 419    .log_stop = hvf_log_stop,
 420    .log_sync = hvf_log_sync,
 421};
 422
 423void hvf_reset_vcpu(CPUState *cpu) {
 424
 425    /* TODO: this shouldn't be needed; there is already a call to
 426     * cpu_synchronize_all_post_reset in vl.c
 427     */
 428    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0);
 429    wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0);
 430    macvm_set_cr0(cpu->hvf_fd, 0x60000010);
 431
 432    wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK);
 433    wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0);
 434    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK);
 435
 436    /* set VMCS guest state fields */
 437    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000);
 438    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff);
 439    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b);
 440    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000);
 441
 442    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0);
 443    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff);
 444    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93);
 445    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0);
 446
 447    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0);
 448    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff);
 449    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93);
 450    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0);
 451
 452    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0);
 453    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff);
 454    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93);
 455    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0);
 456
 457    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0);
 458    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff);
 459    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93);
 460    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0);
 461
 462    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0);
 463    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff);
 464    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93);
 465    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0);
 466
 467    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0);
 468    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0);
 469    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000);
 470    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0);
 471
 472    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0);
 473    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0);
 474    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83);
 475    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0);
 476
 477    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0);
 478    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0);
 479
 480    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0);
 481    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0);
 482
 483    /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/
 484    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0);
 485
 486    wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0);
 487    wreg(cpu->hvf_fd, HV_X86_RDX, 0x623);
 488    wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2);
 489    wreg(cpu->hvf_fd, HV_X86_RSP, 0x0);
 490    wreg(cpu->hvf_fd, HV_X86_RAX, 0x0);
 491    wreg(cpu->hvf_fd, HV_X86_RBX, 0x0);
 492    wreg(cpu->hvf_fd, HV_X86_RCX, 0x0);
 493    wreg(cpu->hvf_fd, HV_X86_RSI, 0x0);
 494    wreg(cpu->hvf_fd, HV_X86_RDI, 0x0);
 495    wreg(cpu->hvf_fd, HV_X86_RBP, 0x0);
 496
 497    for (int i = 0; i < 8; i++) {
 498        wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0);
 499    }
 500
 501    hv_vm_sync_tsc(0);
 502    cpu->halted = 0;
 503    hv_vcpu_invalidate_tlb(cpu->hvf_fd);
 504    hv_vcpu_flush(cpu->hvf_fd);
 505}
 506
 507void hvf_vcpu_destroy(CPUState *cpu)
 508{
 509    hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
 510    assert_hvf_ok(ret);
 511}
 512
 513static void dummy_signal(int sig)
 514{
 515}
 516
 517int hvf_init_vcpu(CPUState *cpu)
 518{
 519
 520    X86CPU *x86cpu = X86_CPU(cpu);
 521    CPUX86State *env = &x86cpu->env;
 522    int r;
 523
 524    /* init cpu signals */
 525    sigset_t set;
 526    struct sigaction sigact;
 527
 528    memset(&sigact, 0, sizeof(sigact));
 529    sigact.sa_handler = dummy_signal;
 530    sigaction(SIG_IPI, &sigact, NULL);
 531
 532    pthread_sigmask(SIG_BLOCK, NULL, &set);
 533    sigdelset(&set, SIG_IPI);
 534
 535    init_emu();
 536    init_decoder();
 537
 538    hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
 539    env->hvf_emul = g_new0(HVFX86EmulatorState, 1);
 540
 541    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
 542    cpu->vcpu_dirty = 1;
 543    assert_hvf_ok(r);
 544
 545    if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
 546        &hvf_state->hvf_caps->vmx_cap_pinbased)) {
 547        abort();
 548    }
 549    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
 550        &hvf_state->hvf_caps->vmx_cap_procbased)) {
 551        abort();
 552    }
 553    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
 554        &hvf_state->hvf_caps->vmx_cap_procbased2)) {
 555        abort();
 556    }
 557    if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
 558        &hvf_state->hvf_caps->vmx_cap_entry)) {
 559        abort();
 560    }
 561
 562    /* set VMCS control fields */
 563    wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
 564          cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
 565          VMCS_PIN_BASED_CTLS_EXTINT |
 566          VMCS_PIN_BASED_CTLS_NMI |
 567          VMCS_PIN_BASED_CTLS_VNMI));
 568    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
 569          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
 570          VMCS_PRI_PROC_BASED_CTLS_HLT |
 571          VMCS_PRI_PROC_BASED_CTLS_MWAIT |
 572          VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
 573          VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
 574          VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
 575    wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
 576          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
 577                   VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 578
 579    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
 580          0));
 581    wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
 582
 583    wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 584
 585    hvf_reset_vcpu(cpu);
 586
 587    x86cpu = X86_CPU(cpu);
 588    x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
 589
 590    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
 591    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
 592    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
 593    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
 594    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
 595    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
 596    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
 597    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
 598    /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/
 599    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
 600    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
 601    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
 602
 603    return 0;
 604}
 605
 606static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
 607{
 608    X86CPU *x86_cpu = X86_CPU(cpu);
 609    CPUX86State *env = &x86_cpu->env;
 610
 611    env->exception_injected = -1;
 612    env->interrupt_injected = -1;
 613    env->nmi_injected = false;
 614    if (idtvec_info & VMCS_IDT_VEC_VALID) {
 615        switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
 616        case VMCS_IDT_VEC_HWINTR:
 617        case VMCS_IDT_VEC_SWINTR:
 618            env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 619            break;
 620        case VMCS_IDT_VEC_NMI:
 621            env->nmi_injected = true;
 622            break;
 623        case VMCS_IDT_VEC_HWEXCEPTION:
 624        case VMCS_IDT_VEC_SWEXCEPTION:
 625            env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 626            break;
 627        case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
 628        default:
 629            abort();
 630        }
 631        if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
 632            (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
 633            env->ins_len = ins_len;
 634        }
 635        if (idtvec_info & VMCS_INTR_DEL_ERRCODE) {
 636            env->has_error_code = true;
 637            env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
 638        }
 639    }
 640    if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 641        VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
 642        env->hflags2 |= HF2_NMI_MASK;
 643    } else {
 644        env->hflags2 &= ~HF2_NMI_MASK;
 645    }
 646    if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 647         (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
 648         VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
 649        env->hflags |= HF_INHIBIT_IRQ_MASK;
 650    } else {
 651        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
 652    }
 653}
 654
 655int hvf_vcpu_exec(CPUState *cpu)
 656{
 657    X86CPU *x86_cpu = X86_CPU(cpu);
 658    CPUX86State *env = &x86_cpu->env;
 659    int ret = 0;
 660    uint64_t rip = 0;
 661
 662    cpu->halted = 0;
 663
 664    if (hvf_process_events(cpu)) {
 665        return EXCP_HLT;
 666    }
 667
 668    do {
 669        if (cpu->vcpu_dirty) {
 670            hvf_put_registers(cpu);
 671            cpu->vcpu_dirty = false;
 672        }
 673
 674        if (hvf_inject_interrupts(cpu)) {
 675            return EXCP_INTERRUPT;
 676        }
 677        vmx_update_tpr(cpu);
 678
 679        qemu_mutex_unlock_iothread();
 680        if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
 681            qemu_mutex_lock_iothread();
 682            return EXCP_HLT;
 683        }
 684
 685        hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
 686        assert_hvf_ok(r);
 687
 688        /* handle VMEXIT */
 689        uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
 690        uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
 691        uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
 692                                           VMCS_EXIT_INSTRUCTION_LENGTH);
 693
 694        uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 695
 696        hvf_store_events(cpu, ins_len, idtvec_info);
 697        rip = rreg(cpu->hvf_fd, HV_X86_RIP);
 698        RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
 699        env->eflags = RFLAGS(env);
 700
 701        qemu_mutex_lock_iothread();
 702
 703        update_apic_tpr(cpu);
 704        current_cpu = cpu;
 705
 706        ret = 0;
 707        switch (exit_reason) {
 708        case EXIT_REASON_HLT: {
 709            macvm_set_rip(cpu, rip + ins_len);
 710            if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 711                (EFLAGS(env) & IF_MASK))
 712                && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
 713                !(idtvec_info & VMCS_IDT_VEC_VALID)) {
 714                cpu->halted = 1;
 715                ret = EXCP_HLT;
 716            }
 717            ret = EXCP_INTERRUPT;
 718            break;
 719        }
 720        case EXIT_REASON_MWAIT: {
 721            ret = EXCP_INTERRUPT;
 722            break;
 723        }
 724            /* Need to check if MMIO or unmmaped fault */
 725        case EXIT_REASON_EPT_FAULT:
 726        {
 727            hvf_slot *slot;
 728            uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
 729
 730            if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
 731                ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
 732                vmx_set_nmi_blocking(cpu);
 733            }
 734
 735            slot = hvf_find_overlap_slot(gpa, gpa);
 736            /* mmio */
 737            if (ept_emulation_fault(slot, gpa, exit_qual)) {
 738                struct x86_decode decode;
 739
 740                load_regs(cpu);
 741                env->hvf_emul->fetch_rip = rip;
 742
 743                decode_instruction(env, &decode);
 744                exec_instruction(env, &decode);
 745                store_regs(cpu);
 746                break;
 747            }
 748            break;
 749        }
 750        case EXIT_REASON_INOUT:
 751        {
 752            uint32_t in = (exit_qual & 8) != 0;
 753            uint32_t size =  (exit_qual & 7) + 1;
 754            uint32_t string =  (exit_qual & 16) != 0;
 755            uint32_t port =  exit_qual >> 16;
 756            /*uint32_t rep = (exit_qual & 0x20) != 0;*/
 757
 758            if (!string && in) {
 759                uint64_t val = 0;
 760                load_regs(cpu);
 761                hvf_handle_io(env, port, &val, 0, size, 1);
 762                if (size == 1) {
 763                    AL(env) = val;
 764                } else if (size == 2) {
 765                    AX(env) = val;
 766                } else if (size == 4) {
 767                    RAX(env) = (uint32_t)val;
 768                } else {
 769                    RAX(env) = (uint64_t)val;
 770                }
 771                RIP(env) += ins_len;
 772                store_regs(cpu);
 773                break;
 774            } else if (!string && !in) {
 775                RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
 776                hvf_handle_io(env, port, &RAX(env), 1, size, 1);
 777                macvm_set_rip(cpu, rip + ins_len);
 778                break;
 779            }
 780            struct x86_decode decode;
 781
 782            load_regs(cpu);
 783            env->hvf_emul->fetch_rip = rip;
 784
 785            decode_instruction(env, &decode);
 786            assert(ins_len == decode.len);
 787            exec_instruction(env, &decode);
 788            store_regs(cpu);
 789
 790            break;
 791        }
 792        case EXIT_REASON_CPUID: {
 793            uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 794            uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
 795            uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 796            uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 797
 798            cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 799
 800            wreg(cpu->hvf_fd, HV_X86_RAX, rax);
 801            wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
 802            wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
 803            wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
 804
 805            macvm_set_rip(cpu, rip + ins_len);
 806            break;
 807        }
 808        case EXIT_REASON_XSETBV: {
 809            X86CPU *x86_cpu = X86_CPU(cpu);
 810            CPUX86State *env = &x86_cpu->env;
 811            uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 812            uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 813            uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 814
 815            if (ecx) {
 816                macvm_set_rip(cpu, rip + ins_len);
 817                break;
 818            }
 819            env->xcr0 = ((uint64_t)edx << 32) | eax;
 820            wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
 821            macvm_set_rip(cpu, rip + ins_len);
 822            break;
 823        }
 824        case EXIT_REASON_INTR_WINDOW:
 825            vmx_clear_int_window_exiting(cpu);
 826            ret = EXCP_INTERRUPT;
 827            break;
 828        case EXIT_REASON_NMI_WINDOW:
 829            vmx_clear_nmi_window_exiting(cpu);
 830            ret = EXCP_INTERRUPT;
 831            break;
 832        case EXIT_REASON_EXT_INTR:
 833            /* force exit and allow io handling */
 834            ret = EXCP_INTERRUPT;
 835            break;
 836        case EXIT_REASON_RDMSR:
 837        case EXIT_REASON_WRMSR:
 838        {
 839            load_regs(cpu);
 840            if (exit_reason == EXIT_REASON_RDMSR) {
 841                simulate_rdmsr(cpu);
 842            } else {
 843                simulate_wrmsr(cpu);
 844            }
 845            RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
 846            store_regs(cpu);
 847            break;
 848        }
 849        case EXIT_REASON_CR_ACCESS: {
 850            int cr;
 851            int reg;
 852
 853            load_regs(cpu);
 854            cr = exit_qual & 15;
 855            reg = (exit_qual >> 8) & 15;
 856
 857            switch (cr) {
 858            case 0x0: {
 859                macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
 860                break;
 861            }
 862            case 4: {
 863                macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
 864                break;
 865            }
 866            case 8: {
 867                X86CPU *x86_cpu = X86_CPU(cpu);
 868                if (exit_qual & 0x10) {
 869                    RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
 870                } else {
 871                    int tpr = RRX(env, reg);
 872                    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 873                    ret = EXCP_INTERRUPT;
 874                }
 875                break;
 876            }
 877            default:
 878                error_report("Unrecognized CR %d", cr);
 879                abort();
 880            }
 881            RIP(env) += ins_len;
 882            store_regs(cpu);
 883            break;
 884        }
 885        case EXIT_REASON_APIC_ACCESS: { /* TODO */
 886            struct x86_decode decode;
 887
 888            load_regs(cpu);
 889            env->hvf_emul->fetch_rip = rip;
 890
 891            decode_instruction(env, &decode);
 892            exec_instruction(env, &decode);
 893            store_regs(cpu);
 894            break;
 895        }
 896        case EXIT_REASON_TPR: {
 897            ret = 1;
 898            break;
 899        }
 900        case EXIT_REASON_TASK_SWITCH: {
 901            uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 902            x68_segment_selector sel = {.sel = exit_qual & 0xffff};
 903            vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
 904             vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
 905             & VMCS_INTR_T_MASK);
 906            break;
 907        }
 908        case EXIT_REASON_TRIPLE_FAULT: {
 909            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 910            ret = EXCP_INTERRUPT;
 911            break;
 912        }
 913        case EXIT_REASON_RDPMC:
 914            wreg(cpu->hvf_fd, HV_X86_RAX, 0);
 915            wreg(cpu->hvf_fd, HV_X86_RDX, 0);
 916            macvm_set_rip(cpu, rip + ins_len);
 917            break;
 918        case VMX_REASON_VMCALL:
 919            env->exception_injected = EXCP0D_GPF;
 920            env->has_error_code = true;
 921            env->error_code = 0;
 922            break;
 923        default:
 924            error_report("%llx: unhandled exit %llx", rip, exit_reason);
 925        }
 926    } while (ret == 0);
 927
 928    return ret;
 929}
 930
 931bool hvf_allowed;
 932
 933static int hvf_accel_init(MachineState *ms)
 934{
 935    int x;
 936    hv_return_t ret;
 937    HVFState *s;
 938
 939    ret = hv_vm_create(HV_VM_DEFAULT);
 940    assert_hvf_ok(ret);
 941
 942    s = g_new0(HVFState, 1);
 943 
 944    s->num_slots = 32;
 945    for (x = 0; x < s->num_slots; ++x) {
 946        s->slots[x].size = 0;
 947        s->slots[x].slot_id = x;
 948    }
 949  
 950    hvf_state = s;
 951    cpu_interrupt_handler = hvf_handle_interrupt;
 952    memory_listener_register(&hvf_memory_listener, &address_space_memory);
 953    return 0;
 954}
 955
 956static void hvf_accel_class_init(ObjectClass *oc, void *data)
 957{
 958    AccelClass *ac = ACCEL_CLASS(oc);
 959    ac->name = "HVF";
 960    ac->init_machine = hvf_accel_init;
 961    ac->allowed = &hvf_allowed;
 962}
 963
 964static const TypeInfo hvf_accel_type = {
 965    .name = TYPE_HVF_ACCEL,
 966    .parent = TYPE_ACCEL,
 967    .class_init = hvf_accel_class_init,
 968};
 969
 970static void hvf_type_init(void)
 971{
 972    type_register_static(&hvf_accel_type);
 973}
 974
 975type_init(hvf_type_init);
 976