qemu/target/i386/hvf/hvf.c
<<
>>
Prefs
   1/* Copyright 2008 IBM Corporation
   2 *           2008 Red Hat, Inc.
   3 * Copyright 2011 Intel Corporation
   4 * Copyright 2016 Veertu, Inc.
   5 * Copyright 2017 The Android Open Source Project
   6 *
   7 * QEMU Hypervisor.framework support
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of version 2 of the GNU General Public
  11 * License as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, see <http://www.gnu.org/licenses/>.
  20 *
  21 * This file contain code under public domain from the hvdos project:
  22 * https://github.com/mist64/hvdos
  23 *
  24 * Parts Copyright (c) 2011 NetApp, Inc.
  25 * All rights reserved.
  26 *
  27 * Redistribution and use in source and binary forms, with or without
  28 * modification, are permitted provided that the following conditions
  29 * are met:
  30 * 1. Redistributions of source code must retain the above copyright
  31 *    notice, this list of conditions and the following disclaimer.
  32 * 2. Redistributions in binary form must reproduce the above copyright
  33 *    notice, this list of conditions and the following disclaimer in the
  34 *    documentation and/or other materials provided with the distribution.
  35 *
  36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46 * SUCH DAMAGE.
  47 */
  48#include "qemu/osdep.h"
  49#include "qemu-common.h"
  50#include "qemu/error-report.h"
  51
  52#include "sysemu/hvf.h"
  53#include "hvf-i386.h"
  54#include "vmcs.h"
  55#include "vmx.h"
  56#include "x86.h"
  57#include "x86_descr.h"
  58#include "x86_mmu.h"
  59#include "x86_decode.h"
  60#include "x86_emu.h"
  61#include "x86_task.h"
  62#include "x86hvf.h"
  63
  64#include <Hypervisor/hv.h>
  65#include <Hypervisor/hv_vmx.h>
  66
  67#include "exec/address-spaces.h"
  68#include "hw/i386/apic_internal.h"
  69#include "hw/boards.h"
  70#include "qemu/main-loop.h"
  71#include "sysemu/accel.h"
  72#include "sysemu/sysemu.h"
  73#include "target/i386/cpu.h"
  74
  75HVFState *hvf_state;
  76
  77static void assert_hvf_ok(hv_return_t ret)
  78{
  79    if (ret == HV_SUCCESS) {
  80        return;
  81    }
  82
  83    switch (ret) {
  84    case HV_ERROR:
  85        error_report("Error: HV_ERROR");
  86        break;
  87    case HV_BUSY:
  88        error_report("Error: HV_BUSY");
  89        break;
  90    case HV_BAD_ARGUMENT:
  91        error_report("Error: HV_BAD_ARGUMENT");
  92        break;
  93    case HV_NO_RESOURCES:
  94        error_report("Error: HV_NO_RESOURCES");
  95        break;
  96    case HV_NO_DEVICE:
  97        error_report("Error: HV_NO_DEVICE");
  98        break;
  99    case HV_UNSUPPORTED:
 100        error_report("Error: HV_UNSUPPORTED");
 101        break;
 102    default:
 103        error_report("Unknown Error");
 104    }
 105
 106    abort();
 107}
 108
 109/* Memory slots */
 110hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end)
 111{
 112    hvf_slot *slot;
 113    int x;
 114    for (x = 0; x < hvf_state->num_slots; ++x) {
 115        slot = &hvf_state->slots[x];
 116        if (slot->size && start < (slot->start + slot->size) &&
 117            end > slot->start) {
 118            return slot;
 119        }
 120    }
 121    return NULL;
 122}
 123
 124struct mac_slot {
 125    int present;
 126    uint64_t size;
 127    uint64_t gpa_start;
 128    uint64_t gva;
 129};
 130
 131struct mac_slot mac_slots[32];
 132#define ALIGN(x, y)  (((x) + (y) - 1) & ~((y) - 1))
 133
 134static int do_hvf_set_memory(hvf_slot *slot)
 135{
 136    struct mac_slot *macslot;
 137    hv_memory_flags_t flags;
 138    hv_return_t ret;
 139
 140    macslot = &mac_slots[slot->slot_id];
 141
 142    if (macslot->present) {
 143        if (macslot->size != slot->size) {
 144            macslot->present = 0;
 145            ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
 146            assert_hvf_ok(ret);
 147        }
 148    }
 149
 150    if (!slot->size) {
 151        return 0;
 152    }
 153
 154    flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
 155
 156    macslot->present = 1;
 157    macslot->gpa_start = slot->start;
 158    macslot->size = slot->size;
 159    ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
 160    assert_hvf_ok(ret);
 161    return 0;
 162}
 163
 164void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
 165{
 166    hvf_slot *mem;
 167    MemoryRegion *area = section->mr;
 168
 169    if (!memory_region_is_ram(area)) {
 170        return;
 171    }
 172
 173    mem = hvf_find_overlap_slot(
 174            section->offset_within_address_space,
 175            section->offset_within_address_space + int128_get64(section->size));
 176
 177    if (mem && add) {
 178        if (mem->size == int128_get64(section->size) &&
 179            mem->start == section->offset_within_address_space &&
 180            mem->mem == (memory_region_get_ram_ptr(area) +
 181            section->offset_within_region)) {
 182            return; /* Same region was attempted to register, go away. */
 183        }
 184    }
 185
 186    /* Region needs to be reset. set the size to 0 and remap it. */
 187    if (mem) {
 188        mem->size = 0;
 189        if (do_hvf_set_memory(mem)) {
 190            error_report("Failed to reset overlapping slot");
 191            abort();
 192        }
 193    }
 194
 195    if (!add) {
 196        return;
 197    }
 198
 199    /* Now make a new slot. */
 200    int x;
 201
 202    for (x = 0; x < hvf_state->num_slots; ++x) {
 203        mem = &hvf_state->slots[x];
 204        if (!mem->size) {
 205            break;
 206        }
 207    }
 208
 209    if (x == hvf_state->num_slots) {
 210        error_report("No free slots");
 211        abort();
 212    }
 213
 214    mem->size = int128_get64(section->size);
 215    mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
 216    mem->start = section->offset_within_address_space;
 217    mem->region = area;
 218
 219    if (do_hvf_set_memory(mem)) {
 220        error_report("Error registering new memory slot");
 221        abort();
 222    }
 223}
 224
 225void vmx_update_tpr(CPUState *cpu)
 226{
 227    /* TODO: need integrate APIC handling */
 228    X86CPU *x86_cpu = X86_CPU(cpu);
 229    int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
 230    int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 231
 232    wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
 233    if (irr == -1) {
 234        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 235    } else {
 236        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
 237              irr >> 4);
 238    }
 239}
 240
 241void update_apic_tpr(CPUState *cpu)
 242{
 243    X86CPU *x86_cpu = X86_CPU(cpu);
 244    int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
 245    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 246}
 247
 248#define VECTORING_INFO_VECTOR_MASK     0xff
 249
 250static void hvf_handle_interrupt(CPUState * cpu, int mask)
 251{
 252    cpu->interrupt_request |= mask;
 253    if (!qemu_cpu_is_self(cpu)) {
 254        qemu_cpu_kick(cpu);
 255    }
 256}
 257
 258void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
 259                  int direction, int size, int count)
 260{
 261    int i;
 262    uint8_t *ptr = buffer;
 263
 264    for (i = 0; i < count; i++) {
 265        address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
 266                         ptr, size,
 267                         direction);
 268        ptr += size;
 269    }
 270}
 271
 272/* TODO: synchronize vcpu state */
 273static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 274{
 275    CPUState *cpu_state = cpu;
 276    if (cpu_state->vcpu_dirty == 0) {
 277        hvf_get_registers(cpu_state);
 278    }
 279
 280    cpu_state->vcpu_dirty = 1;
 281}
 282
 283void hvf_cpu_synchronize_state(CPUState *cpu_state)
 284{
 285    if (cpu_state->vcpu_dirty == 0) {
 286        run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
 287    }
 288}
 289
 290static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 291{
 292    CPUState *cpu_state = cpu;
 293    hvf_put_registers(cpu_state);
 294    cpu_state->vcpu_dirty = false;
 295}
 296
 297void hvf_cpu_synchronize_post_reset(CPUState *cpu_state)
 298{
 299    run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 300}
 301
 302void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 303{
 304    CPUState *cpu_state = cpu;
 305    hvf_put_registers(cpu_state);
 306    cpu_state->vcpu_dirty = false;
 307}
 308
 309void hvf_cpu_synchronize_post_init(CPUState *cpu_state)
 310{
 311    run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 312}
 313
 314static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
 315{
 316    int read, write;
 317
 318    /* EPT fault on an instruction fetch doesn't make sense here */
 319    if (ept_qual & EPT_VIOLATION_INST_FETCH) {
 320        return false;
 321    }
 322
 323    /* EPT fault must be a read fault or a write fault */
 324    read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
 325    write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
 326    if ((read | write) == 0) {
 327        return false;
 328    }
 329
 330    if (write && slot) {
 331        if (slot->flags & HVF_SLOT_LOG) {
 332            memory_region_set_dirty(slot->region, gpa - slot->start, 1);
 333            hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 334                          HV_MEMORY_READ | HV_MEMORY_WRITE);
 335        }
 336    }
 337
 338    /*
 339     * The EPT violation must have been caused by accessing a
 340     * guest-physical address that is a translation of a guest-linear
 341     * address.
 342     */
 343    if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
 344        (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
 345        return false;
 346    }
 347
 348    return !slot;
 349}
 350
 351static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
 352{
 353    hvf_slot *slot;
 354
 355    slot = hvf_find_overlap_slot(
 356            section->offset_within_address_space,
 357            section->offset_within_address_space + int128_get64(section->size));
 358
 359    /* protect region against writes; begin tracking it */
 360    if (on) {
 361        slot->flags |= HVF_SLOT_LOG;
 362        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 363                      HV_MEMORY_READ);
 364    /* stop tracking region*/
 365    } else {
 366        slot->flags &= ~HVF_SLOT_LOG;
 367        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 368                      HV_MEMORY_READ | HV_MEMORY_WRITE);
 369    }
 370}
 371
 372static void hvf_log_start(MemoryListener *listener,
 373                          MemoryRegionSection *section, int old, int new)
 374{
 375    if (old != 0) {
 376        return;
 377    }
 378
 379    hvf_set_dirty_tracking(section, 1);
 380}
 381
 382static void hvf_log_stop(MemoryListener *listener,
 383                         MemoryRegionSection *section, int old, int new)
 384{
 385    if (new != 0) {
 386        return;
 387    }
 388
 389    hvf_set_dirty_tracking(section, 0);
 390}
 391
 392static void hvf_log_sync(MemoryListener *listener,
 393                         MemoryRegionSection *section)
 394{
 395    /*
 396     * sync of dirty pages is handled elsewhere; just make sure we keep
 397     * tracking the region.
 398     */
 399    hvf_set_dirty_tracking(section, 1);
 400}
 401
 402static void hvf_region_add(MemoryListener *listener,
 403                           MemoryRegionSection *section)
 404{
 405    hvf_set_phys_mem(section, true);
 406}
 407
 408static void hvf_region_del(MemoryListener *listener,
 409                           MemoryRegionSection *section)
 410{
 411    hvf_set_phys_mem(section, false);
 412}
 413
 414static MemoryListener hvf_memory_listener = {
 415    .priority = 10,
 416    .region_add = hvf_region_add,
 417    .region_del = hvf_region_del,
 418    .log_start = hvf_log_start,
 419    .log_stop = hvf_log_stop,
 420    .log_sync = hvf_log_sync,
 421};
 422
 423void hvf_reset_vcpu(CPUState *cpu) {
 424
 425    /* TODO: this shouldn't be needed; there is already a call to
 426     * cpu_synchronize_all_post_reset in vl.c
 427     */
 428    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0);
 429    wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0);
 430    macvm_set_cr0(cpu->hvf_fd, 0x60000010);
 431
 432    wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK);
 433    wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0);
 434    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK);
 435
 436    /* set VMCS guest state fields */
 437    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000);
 438    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff);
 439    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b);
 440    wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000);
 441
 442    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0);
 443    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff);
 444    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93);
 445    wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0);
 446
 447    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0);
 448    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff);
 449    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93);
 450    wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0);
 451
 452    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0);
 453    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff);
 454    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93);
 455    wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0);
 456
 457    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0);
 458    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff);
 459    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93);
 460    wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0);
 461
 462    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0);
 463    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff);
 464    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93);
 465    wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0);
 466
 467    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0);
 468    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0);
 469    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000);
 470    wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0);
 471
 472    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0);
 473    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0);
 474    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83);
 475    wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0);
 476
 477    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0);
 478    wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0);
 479
 480    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0);
 481    wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0);
 482
 483    /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/
 484    wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0);
 485
 486    wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0);
 487    wreg(cpu->hvf_fd, HV_X86_RDX, 0x623);
 488    wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2);
 489    wreg(cpu->hvf_fd, HV_X86_RSP, 0x0);
 490    wreg(cpu->hvf_fd, HV_X86_RAX, 0x0);
 491    wreg(cpu->hvf_fd, HV_X86_RBX, 0x0);
 492    wreg(cpu->hvf_fd, HV_X86_RCX, 0x0);
 493    wreg(cpu->hvf_fd, HV_X86_RSI, 0x0);
 494    wreg(cpu->hvf_fd, HV_X86_RDI, 0x0);
 495    wreg(cpu->hvf_fd, HV_X86_RBP, 0x0);
 496
 497    for (int i = 0; i < 8; i++) {
 498        wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0);
 499    }
 500
 501    hv_vm_sync_tsc(0);
 502    hv_vcpu_invalidate_tlb(cpu->hvf_fd);
 503    hv_vcpu_flush(cpu->hvf_fd);
 504}
 505
 506void hvf_vcpu_destroy(CPUState *cpu)
 507{
 508    hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
 509    assert_hvf_ok(ret);
 510}
 511
 512static void dummy_signal(int sig)
 513{
 514}
 515
 516int hvf_init_vcpu(CPUState *cpu)
 517{
 518
 519    X86CPU *x86cpu = X86_CPU(cpu);
 520    CPUX86State *env = &x86cpu->env;
 521    int r;
 522
 523    /* init cpu signals */
 524    sigset_t set;
 525    struct sigaction sigact;
 526
 527    memset(&sigact, 0, sizeof(sigact));
 528    sigact.sa_handler = dummy_signal;
 529    sigaction(SIG_IPI, &sigact, NULL);
 530
 531    pthread_sigmask(SIG_BLOCK, NULL, &set);
 532    sigdelset(&set, SIG_IPI);
 533
 534    init_emu();
 535    init_decoder();
 536
 537    hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
 538    env->hvf_emul = g_new0(HVFX86EmulatorState, 1);
 539
 540    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
 541    cpu->vcpu_dirty = 1;
 542    assert_hvf_ok(r);
 543
 544    if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
 545        &hvf_state->hvf_caps->vmx_cap_pinbased)) {
 546        abort();
 547    }
 548    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
 549        &hvf_state->hvf_caps->vmx_cap_procbased)) {
 550        abort();
 551    }
 552    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
 553        &hvf_state->hvf_caps->vmx_cap_procbased2)) {
 554        abort();
 555    }
 556    if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
 557        &hvf_state->hvf_caps->vmx_cap_entry)) {
 558        abort();
 559    }
 560
 561    /* set VMCS control fields */
 562    wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
 563          cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
 564          VMCS_PIN_BASED_CTLS_EXTINT |
 565          VMCS_PIN_BASED_CTLS_NMI |
 566          VMCS_PIN_BASED_CTLS_VNMI));
 567    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
 568          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
 569          VMCS_PRI_PROC_BASED_CTLS_HLT |
 570          VMCS_PRI_PROC_BASED_CTLS_MWAIT |
 571          VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
 572          VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
 573          VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
 574    wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
 575          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
 576                   VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 577
 578    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
 579          0));
 580    wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
 581
 582    wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 583
 584    x86cpu = X86_CPU(cpu);
 585    x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
 586
 587    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
 588    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
 589    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
 590    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
 591    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
 592    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
 593    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
 594    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
 595    /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/
 596    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
 597    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
 598    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
 599
 600    return 0;
 601}
 602
 603static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
 604{
 605    X86CPU *x86_cpu = X86_CPU(cpu);
 606    CPUX86State *env = &x86_cpu->env;
 607
 608    env->exception_nr = -1;
 609    env->exception_pending = 0;
 610    env->exception_injected = 0;
 611    env->interrupt_injected = -1;
 612    env->nmi_injected = false;
 613    if (idtvec_info & VMCS_IDT_VEC_VALID) {
 614        switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
 615        case VMCS_IDT_VEC_HWINTR:
 616        case VMCS_IDT_VEC_SWINTR:
 617            env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 618            break;
 619        case VMCS_IDT_VEC_NMI:
 620            env->nmi_injected = true;
 621            break;
 622        case VMCS_IDT_VEC_HWEXCEPTION:
 623        case VMCS_IDT_VEC_SWEXCEPTION:
 624            env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
 625            env->exception_injected = 1;
 626            break;
 627        case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
 628        default:
 629            abort();
 630        }
 631        if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
 632            (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
 633            env->ins_len = ins_len;
 634        }
 635        if (idtvec_info & VMCS_INTR_DEL_ERRCODE) {
 636            env->has_error_code = true;
 637            env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
 638        }
 639    }
 640    if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 641        VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
 642        env->hflags2 |= HF2_NMI_MASK;
 643    } else {
 644        env->hflags2 &= ~HF2_NMI_MASK;
 645    }
 646    if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 647         (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
 648         VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
 649        env->hflags |= HF_INHIBIT_IRQ_MASK;
 650    } else {
 651        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
 652    }
 653}
 654
 655int hvf_vcpu_exec(CPUState *cpu)
 656{
 657    X86CPU *x86_cpu = X86_CPU(cpu);
 658    CPUX86State *env = &x86_cpu->env;
 659    int ret = 0;
 660    uint64_t rip = 0;
 661
 662    if (hvf_process_events(cpu)) {
 663        return EXCP_HLT;
 664    }
 665
 666    do {
 667        if (cpu->vcpu_dirty) {
 668            hvf_put_registers(cpu);
 669            cpu->vcpu_dirty = false;
 670        }
 671
 672        if (hvf_inject_interrupts(cpu)) {
 673            return EXCP_INTERRUPT;
 674        }
 675        vmx_update_tpr(cpu);
 676
 677        qemu_mutex_unlock_iothread();
 678        if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
 679            qemu_mutex_lock_iothread();
 680            return EXCP_HLT;
 681        }
 682
 683        hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
 684        assert_hvf_ok(r);
 685
 686        /* handle VMEXIT */
 687        uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
 688        uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
 689        uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
 690                                           VMCS_EXIT_INSTRUCTION_LENGTH);
 691
 692        uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 693
 694        hvf_store_events(cpu, ins_len, idtvec_info);
 695        rip = rreg(cpu->hvf_fd, HV_X86_RIP);
 696        RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
 697        env->eflags = RFLAGS(env);
 698
 699        qemu_mutex_lock_iothread();
 700
 701        update_apic_tpr(cpu);
 702        current_cpu = cpu;
 703
 704        ret = 0;
 705        switch (exit_reason) {
 706        case EXIT_REASON_HLT: {
 707            macvm_set_rip(cpu, rip + ins_len);
 708            if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 709                (EFLAGS(env) & IF_MASK))
 710                && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
 711                !(idtvec_info & VMCS_IDT_VEC_VALID)) {
 712                cpu->halted = 1;
 713                ret = EXCP_HLT;
 714                break;
 715            }
 716            ret = EXCP_INTERRUPT;
 717            break;
 718        }
 719        case EXIT_REASON_MWAIT: {
 720            ret = EXCP_INTERRUPT;
 721            break;
 722        }
 723            /* Need to check if MMIO or unmmaped fault */
 724        case EXIT_REASON_EPT_FAULT:
 725        {
 726            hvf_slot *slot;
 727            uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
 728
 729            if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
 730                ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
 731                vmx_set_nmi_blocking(cpu);
 732            }
 733
 734            slot = hvf_find_overlap_slot(gpa, gpa);
 735            /* mmio */
 736            if (ept_emulation_fault(slot, gpa, exit_qual)) {
 737                struct x86_decode decode;
 738
 739                load_regs(cpu);
 740                env->hvf_emul->fetch_rip = rip;
 741
 742                decode_instruction(env, &decode);
 743                exec_instruction(env, &decode);
 744                store_regs(cpu);
 745                break;
 746            }
 747            break;
 748        }
 749        case EXIT_REASON_INOUT:
 750        {
 751            uint32_t in = (exit_qual & 8) != 0;
 752            uint32_t size =  (exit_qual & 7) + 1;
 753            uint32_t string =  (exit_qual & 16) != 0;
 754            uint32_t port =  exit_qual >> 16;
 755            /*uint32_t rep = (exit_qual & 0x20) != 0;*/
 756
 757            if (!string && in) {
 758                uint64_t val = 0;
 759                load_regs(cpu);
 760                hvf_handle_io(env, port, &val, 0, size, 1);
 761                if (size == 1) {
 762                    AL(env) = val;
 763                } else if (size == 2) {
 764                    AX(env) = val;
 765                } else if (size == 4) {
 766                    RAX(env) = (uint32_t)val;
 767                } else {
 768                    RAX(env) = (uint64_t)val;
 769                }
 770                RIP(env) += ins_len;
 771                store_regs(cpu);
 772                break;
 773            } else if (!string && !in) {
 774                RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
 775                hvf_handle_io(env, port, &RAX(env), 1, size, 1);
 776                macvm_set_rip(cpu, rip + ins_len);
 777                break;
 778            }
 779            struct x86_decode decode;
 780
 781            load_regs(cpu);
 782            env->hvf_emul->fetch_rip = rip;
 783
 784            decode_instruction(env, &decode);
 785            assert(ins_len == decode.len);
 786            exec_instruction(env, &decode);
 787            store_regs(cpu);
 788
 789            break;
 790        }
 791        case EXIT_REASON_CPUID: {
 792            uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 793            uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
 794            uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 795            uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 796
 797            cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 798
 799            wreg(cpu->hvf_fd, HV_X86_RAX, rax);
 800            wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
 801            wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
 802            wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
 803
 804            macvm_set_rip(cpu, rip + ins_len);
 805            break;
 806        }
 807        case EXIT_REASON_XSETBV: {
 808            X86CPU *x86_cpu = X86_CPU(cpu);
 809            CPUX86State *env = &x86_cpu->env;
 810            uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 811            uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 812            uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 813
 814            if (ecx) {
 815                macvm_set_rip(cpu, rip + ins_len);
 816                break;
 817            }
 818            env->xcr0 = ((uint64_t)edx << 32) | eax;
 819            wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
 820            macvm_set_rip(cpu, rip + ins_len);
 821            break;
 822        }
 823        case EXIT_REASON_INTR_WINDOW:
 824            vmx_clear_int_window_exiting(cpu);
 825            ret = EXCP_INTERRUPT;
 826            break;
 827        case EXIT_REASON_NMI_WINDOW:
 828            vmx_clear_nmi_window_exiting(cpu);
 829            ret = EXCP_INTERRUPT;
 830            break;
 831        case EXIT_REASON_EXT_INTR:
 832            /* force exit and allow io handling */
 833            ret = EXCP_INTERRUPT;
 834            break;
 835        case EXIT_REASON_RDMSR:
 836        case EXIT_REASON_WRMSR:
 837        {
 838            load_regs(cpu);
 839            if (exit_reason == EXIT_REASON_RDMSR) {
 840                simulate_rdmsr(cpu);
 841            } else {
 842                simulate_wrmsr(cpu);
 843            }
 844            RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH);
 845            store_regs(cpu);
 846            break;
 847        }
 848        case EXIT_REASON_CR_ACCESS: {
 849            int cr;
 850            int reg;
 851
 852            load_regs(cpu);
 853            cr = exit_qual & 15;
 854            reg = (exit_qual >> 8) & 15;
 855
 856            switch (cr) {
 857            case 0x0: {
 858                macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
 859                break;
 860            }
 861            case 4: {
 862                macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
 863                break;
 864            }
 865            case 8: {
 866                X86CPU *x86_cpu = X86_CPU(cpu);
 867                if (exit_qual & 0x10) {
 868                    RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
 869                } else {
 870                    int tpr = RRX(env, reg);
 871                    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 872                    ret = EXCP_INTERRUPT;
 873                }
 874                break;
 875            }
 876            default:
 877                error_report("Unrecognized CR %d", cr);
 878                abort();
 879            }
 880            RIP(env) += ins_len;
 881            store_regs(cpu);
 882            break;
 883        }
 884        case EXIT_REASON_APIC_ACCESS: { /* TODO */
 885            struct x86_decode decode;
 886
 887            load_regs(cpu);
 888            env->hvf_emul->fetch_rip = rip;
 889
 890            decode_instruction(env, &decode);
 891            exec_instruction(env, &decode);
 892            store_regs(cpu);
 893            break;
 894        }
 895        case EXIT_REASON_TPR: {
 896            ret = 1;
 897            break;
 898        }
 899        case EXIT_REASON_TASK_SWITCH: {
 900            uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 901            x68_segment_selector sel = {.sel = exit_qual & 0xffff};
 902            vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
 903             vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
 904             & VMCS_INTR_T_MASK);
 905            break;
 906        }
 907        case EXIT_REASON_TRIPLE_FAULT: {
 908            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 909            ret = EXCP_INTERRUPT;
 910            break;
 911        }
 912        case EXIT_REASON_RDPMC:
 913            wreg(cpu->hvf_fd, HV_X86_RAX, 0);
 914            wreg(cpu->hvf_fd, HV_X86_RDX, 0);
 915            macvm_set_rip(cpu, rip + ins_len);
 916            break;
 917        case VMX_REASON_VMCALL:
 918            env->exception_nr = EXCP0D_GPF;
 919            env->exception_injected = 1;
 920            env->has_error_code = true;
 921            env->error_code = 0;
 922            break;
 923        default:
 924            error_report("%llx: unhandled exit %llx", rip, exit_reason);
 925        }
 926    } while (ret == 0);
 927
 928    return ret;
 929}
 930
 931bool hvf_allowed;
 932
 933static int hvf_accel_init(MachineState *ms)
 934{
 935    int x;
 936    hv_return_t ret;
 937    HVFState *s;
 938
 939    ret = hv_vm_create(HV_VM_DEFAULT);
 940    assert_hvf_ok(ret);
 941
 942    s = g_new0(HVFState, 1);
 943 
 944    s->num_slots = 32;
 945    for (x = 0; x < s->num_slots; ++x) {
 946        s->slots[x].size = 0;
 947        s->slots[x].slot_id = x;
 948    }
 949  
 950    hvf_state = s;
 951    cpu_interrupt_handler = hvf_handle_interrupt;
 952    memory_listener_register(&hvf_memory_listener, &address_space_memory);
 953    return 0;
 954}
 955
 956static void hvf_accel_class_init(ObjectClass *oc, void *data)
 957{
 958    AccelClass *ac = ACCEL_CLASS(oc);
 959    ac->name = "HVF";
 960    ac->init_machine = hvf_accel_init;
 961    ac->allowed = &hvf_allowed;
 962}
 963
 964static const TypeInfo hvf_accel_type = {
 965    .name = TYPE_HVF_ACCEL,
 966    .parent = TYPE_ACCEL,
 967    .class_init = hvf_accel_class_init,
 968};
 969
 970static void hvf_type_init(void)
 971{
 972    type_register_static(&hvf_accel_type);
 973}
 974
 975type_init(hvf_type_init);
 976