qemu/target/i386/hvf/hvf.c
<<
>>
Prefs
   1/* Copyright 2008 IBM Corporation
   2 *           2008 Red Hat, Inc.
   3 * Copyright 2011 Intel Corporation
   4 * Copyright 2016 Veertu, Inc.
   5 * Copyright 2017 The Android Open Source Project
   6 *
   7 * QEMU Hypervisor.framework support
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of version 2 of the GNU General Public
  11 * License as published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, see <http://www.gnu.org/licenses/>.
  20 *
  21 * This file contain code under public domain from the hvdos project:
  22 * https://github.com/mist64/hvdos
  23 *
  24 * Parts Copyright (c) 2011 NetApp, Inc.
  25 * All rights reserved.
  26 *
  27 * Redistribution and use in source and binary forms, with or without
  28 * modification, are permitted provided that the following conditions
  29 * are met:
  30 * 1. Redistributions of source code must retain the above copyright
  31 *    notice, this list of conditions and the following disclaimer.
  32 * 2. Redistributions in binary form must reproduce the above copyright
  33 *    notice, this list of conditions and the following disclaimer in the
  34 *    documentation and/or other materials provided with the distribution.
  35 *
  36 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  39 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46 * SUCH DAMAGE.
  47 */
  48
  49#include "qemu/osdep.h"
  50#include "qemu-common.h"
  51#include "qemu/error-report.h"
  52
  53#include "sysemu/hvf.h"
  54#include "sysemu/runstate.h"
  55#include "hvf-i386.h"
  56#include "vmcs.h"
  57#include "vmx.h"
  58#include "x86.h"
  59#include "x86_descr.h"
  60#include "x86_mmu.h"
  61#include "x86_decode.h"
  62#include "x86_emu.h"
  63#include "x86_task.h"
  64#include "x86hvf.h"
  65
  66#include <Hypervisor/hv.h>
  67#include <Hypervisor/hv_vmx.h>
  68#include <sys/sysctl.h>
  69
  70#include "exec/address-spaces.h"
  71#include "hw/i386/apic_internal.h"
  72#include "qemu/main-loop.h"
  73#include "qemu/accel.h"
  74#include "target/i386/cpu.h"
  75
  76#include "hvf-accel-ops.h"
  77
  78HVFState *hvf_state;
  79
  80static void assert_hvf_ok(hv_return_t ret)
  81{
  82    if (ret == HV_SUCCESS) {
  83        return;
  84    }
  85
  86    switch (ret) {
  87    case HV_ERROR:
  88        error_report("Error: HV_ERROR");
  89        break;
  90    case HV_BUSY:
  91        error_report("Error: HV_BUSY");
  92        break;
  93    case HV_BAD_ARGUMENT:
  94        error_report("Error: HV_BAD_ARGUMENT");
  95        break;
  96    case HV_NO_RESOURCES:
  97        error_report("Error: HV_NO_RESOURCES");
  98        break;
  99    case HV_NO_DEVICE:
 100        error_report("Error: HV_NO_DEVICE");
 101        break;
 102    case HV_UNSUPPORTED:
 103        error_report("Error: HV_UNSUPPORTED");
 104        break;
 105    default:
 106        error_report("Unknown Error");
 107    }
 108
 109    abort();
 110}
 111
 112/* Memory slots */
 113hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t size)
 114{
 115    hvf_slot *slot;
 116    int x;
 117    for (x = 0; x < hvf_state->num_slots; ++x) {
 118        slot = &hvf_state->slots[x];
 119        if (slot->size && start < (slot->start + slot->size) &&
 120            (start + size) > slot->start) {
 121            return slot;
 122        }
 123    }
 124    return NULL;
 125}
 126
 127struct mac_slot {
 128    int present;
 129    uint64_t size;
 130    uint64_t gpa_start;
 131    uint64_t gva;
 132};
 133
 134struct mac_slot mac_slots[32];
 135
 136static int do_hvf_set_memory(hvf_slot *slot, hv_memory_flags_t flags)
 137{
 138    struct mac_slot *macslot;
 139    hv_return_t ret;
 140
 141    macslot = &mac_slots[slot->slot_id];
 142
 143    if (macslot->present) {
 144        if (macslot->size != slot->size) {
 145            macslot->present = 0;
 146            ret = hv_vm_unmap(macslot->gpa_start, macslot->size);
 147            assert_hvf_ok(ret);
 148        }
 149    }
 150
 151    if (!slot->size) {
 152        return 0;
 153    }
 154
 155    macslot->present = 1;
 156    macslot->gpa_start = slot->start;
 157    macslot->size = slot->size;
 158    ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags);
 159    assert_hvf_ok(ret);
 160    return 0;
 161}
 162
 163void hvf_set_phys_mem(MemoryRegionSection *section, bool add)
 164{
 165    hvf_slot *mem;
 166    MemoryRegion *area = section->mr;
 167    bool writeable = !area->readonly && !area->rom_device;
 168    hv_memory_flags_t flags;
 169
 170    if (!memory_region_is_ram(area)) {
 171        if (writeable) {
 172            return;
 173        } else if (!memory_region_is_romd(area)) {
 174            /*
 175             * If the memory device is not in romd_mode, then we actually want
 176             * to remove the hvf memory slot so all accesses will trap.
 177             */
 178             add = false;
 179        }
 180    }
 181
 182    mem = hvf_find_overlap_slot(
 183            section->offset_within_address_space,
 184            int128_get64(section->size));
 185
 186    if (mem && add) {
 187        if (mem->size == int128_get64(section->size) &&
 188            mem->start == section->offset_within_address_space &&
 189            mem->mem == (memory_region_get_ram_ptr(area) +
 190            section->offset_within_region)) {
 191            return; /* Same region was attempted to register, go away. */
 192        }
 193    }
 194
 195    /* Region needs to be reset. set the size to 0 and remap it. */
 196    if (mem) {
 197        mem->size = 0;
 198        if (do_hvf_set_memory(mem, 0)) {
 199            error_report("Failed to reset overlapping slot");
 200            abort();
 201        }
 202    }
 203
 204    if (!add) {
 205        return;
 206    }
 207
 208    if (area->readonly ||
 209        (!memory_region_is_ram(area) && memory_region_is_romd(area))) {
 210        flags = HV_MEMORY_READ | HV_MEMORY_EXEC;
 211    } else {
 212        flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC;
 213    }
 214
 215    /* Now make a new slot. */
 216    int x;
 217
 218    for (x = 0; x < hvf_state->num_slots; ++x) {
 219        mem = &hvf_state->slots[x];
 220        if (!mem->size) {
 221            break;
 222        }
 223    }
 224
 225    if (x == hvf_state->num_slots) {
 226        error_report("No free slots");
 227        abort();
 228    }
 229
 230    mem->size = int128_get64(section->size);
 231    mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region;
 232    mem->start = section->offset_within_address_space;
 233    mem->region = area;
 234
 235    if (do_hvf_set_memory(mem, flags)) {
 236        error_report("Error registering new memory slot");
 237        abort();
 238    }
 239}
 240
 241void vmx_update_tpr(CPUState *cpu)
 242{
 243    /* TODO: need integrate APIC handling */
 244    X86CPU *x86_cpu = X86_CPU(cpu);
 245    int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
 246    int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 247
 248    wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
 249    if (irr == -1) {
 250        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 251    } else {
 252        wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
 253              irr >> 4);
 254    }
 255}
 256
 257static void update_apic_tpr(CPUState *cpu)
 258{
 259    X86CPU *x86_cpu = X86_CPU(cpu);
 260    int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
 261    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 262}
 263
 264#define VECTORING_INFO_VECTOR_MASK     0xff
 265
 266void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer,
 267                  int direction, int size, int count)
 268{
 269    int i;
 270    uint8_t *ptr = buffer;
 271
 272    for (i = 0; i < count; i++) {
 273        address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
 274                         ptr, size,
 275                         direction);
 276        ptr += size;
 277    }
 278}
 279
 280static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 281{
 282    if (!cpu->vcpu_dirty) {
 283        hvf_get_registers(cpu);
 284        cpu->vcpu_dirty = true;
 285    }
 286}
 287
 288void hvf_cpu_synchronize_state(CPUState *cpu)
 289{
 290    if (!cpu->vcpu_dirty) {
 291        run_on_cpu(cpu, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL);
 292    }
 293}
 294
 295static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
 296                                              run_on_cpu_data arg)
 297{
 298    hvf_put_registers(cpu);
 299    cpu->vcpu_dirty = false;
 300}
 301
 302void hvf_cpu_synchronize_post_reset(CPUState *cpu)
 303{
 304    run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 305}
 306
 307static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
 308                                             run_on_cpu_data arg)
 309{
 310    hvf_put_registers(cpu);
 311    cpu->vcpu_dirty = false;
 312}
 313
 314void hvf_cpu_synchronize_post_init(CPUState *cpu)
 315{
 316    run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 317}
 318
 319static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
 320                                              run_on_cpu_data arg)
 321{
 322    cpu->vcpu_dirty = true;
 323}
 324
 325void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
 326{
 327    run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 328}
 329
 330static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual)
 331{
 332    int read, write;
 333
 334    /* EPT fault on an instruction fetch doesn't make sense here */
 335    if (ept_qual & EPT_VIOLATION_INST_FETCH) {
 336        return false;
 337    }
 338
 339    /* EPT fault must be a read fault or a write fault */
 340    read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
 341    write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
 342    if ((read | write) == 0) {
 343        return false;
 344    }
 345
 346    if (write && slot) {
 347        if (slot->flags & HVF_SLOT_LOG) {
 348            memory_region_set_dirty(slot->region, gpa - slot->start, 1);
 349            hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 350                          HV_MEMORY_READ | HV_MEMORY_WRITE);
 351        }
 352    }
 353
 354    /*
 355     * The EPT violation must have been caused by accessing a
 356     * guest-physical address that is a translation of a guest-linear
 357     * address.
 358     */
 359    if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
 360        (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
 361        return false;
 362    }
 363
 364    if (!slot) {
 365        return true;
 366    }
 367    if (!memory_region_is_ram(slot->region) &&
 368        !(read && memory_region_is_romd(slot->region))) {
 369        return true;
 370    }
 371    return false;
 372}
 373
 374static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on)
 375{
 376    hvf_slot *slot;
 377
 378    slot = hvf_find_overlap_slot(
 379            section->offset_within_address_space,
 380            int128_get64(section->size));
 381
 382    /* protect region against writes; begin tracking it */
 383    if (on) {
 384        slot->flags |= HVF_SLOT_LOG;
 385        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 386                      HV_MEMORY_READ);
 387    /* stop tracking region*/
 388    } else {
 389        slot->flags &= ~HVF_SLOT_LOG;
 390        hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size,
 391                      HV_MEMORY_READ | HV_MEMORY_WRITE);
 392    }
 393}
 394
 395static void hvf_log_start(MemoryListener *listener,
 396                          MemoryRegionSection *section, int old, int new)
 397{
 398    if (old != 0) {
 399        return;
 400    }
 401
 402    hvf_set_dirty_tracking(section, 1);
 403}
 404
 405static void hvf_log_stop(MemoryListener *listener,
 406                         MemoryRegionSection *section, int old, int new)
 407{
 408    if (new != 0) {
 409        return;
 410    }
 411
 412    hvf_set_dirty_tracking(section, 0);
 413}
 414
 415static void hvf_log_sync(MemoryListener *listener,
 416                         MemoryRegionSection *section)
 417{
 418    /*
 419     * sync of dirty pages is handled elsewhere; just make sure we keep
 420     * tracking the region.
 421     */
 422    hvf_set_dirty_tracking(section, 1);
 423}
 424
 425static void hvf_region_add(MemoryListener *listener,
 426                           MemoryRegionSection *section)
 427{
 428    hvf_set_phys_mem(section, true);
 429}
 430
 431static void hvf_region_del(MemoryListener *listener,
 432                           MemoryRegionSection *section)
 433{
 434    hvf_set_phys_mem(section, false);
 435}
 436
 437static MemoryListener hvf_memory_listener = {
 438    .priority = 10,
 439    .region_add = hvf_region_add,
 440    .region_del = hvf_region_del,
 441    .log_start = hvf_log_start,
 442    .log_stop = hvf_log_stop,
 443    .log_sync = hvf_log_sync,
 444};
 445
 446void hvf_vcpu_destroy(CPUState *cpu)
 447{
 448    X86CPU *x86_cpu = X86_CPU(cpu);
 449    CPUX86State *env = &x86_cpu->env;
 450
 451    hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd);
 452    g_free(env->hvf_mmio_buf);
 453    assert_hvf_ok(ret);
 454}
 455
 456static void dummy_signal(int sig)
 457{
 458}
 459
 460static void init_tsc_freq(CPUX86State *env)
 461{
 462    size_t length;
 463    uint64_t tsc_freq;
 464
 465    if (env->tsc_khz != 0) {
 466        return;
 467    }
 468
 469    length = sizeof(uint64_t);
 470    if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) {
 471        return;
 472    }
 473    env->tsc_khz = tsc_freq / 1000;  /* Hz to KHz */
 474}
 475
 476static void init_apic_bus_freq(CPUX86State *env)
 477{
 478    size_t length;
 479    uint64_t bus_freq;
 480
 481    if (env->apic_bus_freq != 0) {
 482        return;
 483    }
 484
 485    length = sizeof(uint64_t);
 486    if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) {
 487        return;
 488    }
 489    env->apic_bus_freq = bus_freq;
 490}
 491
 492static inline bool tsc_is_known(CPUX86State *env)
 493{
 494    return env->tsc_khz != 0;
 495}
 496
 497static inline bool apic_bus_freq_is_known(CPUX86State *env)
 498{
 499    return env->apic_bus_freq != 0;
 500}
 501
 502int hvf_init_vcpu(CPUState *cpu)
 503{
 504
 505    X86CPU *x86cpu = X86_CPU(cpu);
 506    CPUX86State *env = &x86cpu->env;
 507    int r;
 508
 509    /* init cpu signals */
 510    sigset_t set;
 511    struct sigaction sigact;
 512
 513    memset(&sigact, 0, sizeof(sigact));
 514    sigact.sa_handler = dummy_signal;
 515    sigaction(SIG_IPI, &sigact, NULL);
 516
 517    pthread_sigmask(SIG_BLOCK, NULL, &set);
 518    sigdelset(&set, SIG_IPI);
 519
 520    init_emu();
 521    init_decoder();
 522
 523    hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
 524    env->hvf_mmio_buf = g_new(char, 4096);
 525
 526    if (x86cpu->vmware_cpuid_freq) {
 527        init_tsc_freq(env);
 528        init_apic_bus_freq(env);
 529
 530        if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
 531            error_report("vmware-cpuid-freq: feature couldn't be enabled");
 532        }
 533    }
 534
 535    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
 536    cpu->vcpu_dirty = 1;
 537    assert_hvf_ok(r);
 538
 539    if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED,
 540        &hvf_state->hvf_caps->vmx_cap_pinbased)) {
 541        abort();
 542    }
 543    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED,
 544        &hvf_state->hvf_caps->vmx_cap_procbased)) {
 545        abort();
 546    }
 547    if (hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2,
 548        &hvf_state->hvf_caps->vmx_cap_procbased2)) {
 549        abort();
 550    }
 551    if (hv_vmx_read_capability(HV_VMX_CAP_ENTRY,
 552        &hvf_state->hvf_caps->vmx_cap_entry)) {
 553        abort();
 554    }
 555
 556    /* set VMCS control fields */
 557    wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
 558          cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
 559          VMCS_PIN_BASED_CTLS_EXTINT |
 560          VMCS_PIN_BASED_CTLS_NMI |
 561          VMCS_PIN_BASED_CTLS_VNMI));
 562    wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
 563          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
 564          VMCS_PRI_PROC_BASED_CTLS_HLT |
 565          VMCS_PRI_PROC_BASED_CTLS_MWAIT |
 566          VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
 567          VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
 568          VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
 569    wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
 570          cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
 571                   VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 572
 573    wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
 574          0));
 575    wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
 576
 577    wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
 578
 579    x86cpu = X86_CPU(cpu);
 580    x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
 581
 582    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1);
 583    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1);
 584    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1);
 585    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1);
 586    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1);
 587    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1);
 588    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1);
 589    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1);
 590    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);
 591    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1);
 592    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1);
 593    hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1);
 594
 595    return 0;
 596}
 597
 598static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info)
 599{
 600    X86CPU *x86_cpu = X86_CPU(cpu);
 601    CPUX86State *env = &x86_cpu->env;
 602
 603    env->exception_nr = -1;
 604    env->exception_pending = 0;
 605    env->exception_injected = 0;
 606    env->interrupt_injected = -1;
 607    env->nmi_injected = false;
 608    env->ins_len = 0;
 609    env->has_error_code = false;
 610    if (idtvec_info & VMCS_IDT_VEC_VALID) {
 611        switch (idtvec_info & VMCS_IDT_VEC_TYPE) {
 612        case VMCS_IDT_VEC_HWINTR:
 613        case VMCS_IDT_VEC_SWINTR:
 614            env->interrupt_injected = idtvec_info & VMCS_IDT_VEC_VECNUM;
 615            break;
 616        case VMCS_IDT_VEC_NMI:
 617            env->nmi_injected = true;
 618            break;
 619        case VMCS_IDT_VEC_HWEXCEPTION:
 620        case VMCS_IDT_VEC_SWEXCEPTION:
 621            env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM;
 622            env->exception_injected = 1;
 623            break;
 624        case VMCS_IDT_VEC_PRIV_SWEXCEPTION:
 625        default:
 626            abort();
 627        }
 628        if ((idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWEXCEPTION ||
 629            (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) {
 630            env->ins_len = ins_len;
 631        }
 632        if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
 633            env->has_error_code = true;
 634            env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR);
 635        }
 636    }
 637    if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 638        VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) {
 639        env->hflags2 |= HF2_NMI_MASK;
 640    } else {
 641        env->hflags2 &= ~HF2_NMI_MASK;
 642    }
 643    if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) &
 644         (VMCS_INTERRUPTIBILITY_STI_BLOCKING |
 645         VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) {
 646        env->hflags |= HF_INHIBIT_IRQ_MASK;
 647    } else {
 648        env->hflags &= ~HF_INHIBIT_IRQ_MASK;
 649    }
 650}
 651
 652static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 653                              uint32_t *eax, uint32_t *ebx,
 654                              uint32_t *ecx, uint32_t *edx)
 655{
 656    /*
 657     * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs,
 658     * leafs 0x40000001-0x4000000F are filled with zeros
 659     * Provides vmware-cpuid-freq support to hvf
 660     *
 661     * Note: leaf 0x40000000 not exposes HVF,
 662     * leaving hypervisor signature empty
 663     */
 664
 665    if (index < 0x40000000 || index > 0x40000010 ||
 666        !tsc_is_known(env) || !apic_bus_freq_is_known(env)) {
 667
 668        cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx);
 669        return;
 670    }
 671
 672    switch (index) {
 673    case 0x40000000:
 674        *eax = 0x40000010;    /* Max available cpuid leaf */
 675        *ebx = 0;             /* Leave signature empty */
 676        *ecx = 0;
 677        *edx = 0;
 678        break;
 679    case 0x40000010:
 680        *eax = env->tsc_khz;
 681        *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */
 682        *ecx = 0;
 683        *edx = 0;
 684        break;
 685    default:
 686        *eax = 0;
 687        *ebx = 0;
 688        *ecx = 0;
 689        *edx = 0;
 690        break;
 691    }
 692}
 693
 694int hvf_vcpu_exec(CPUState *cpu)
 695{
 696    X86CPU *x86_cpu = X86_CPU(cpu);
 697    CPUX86State *env = &x86_cpu->env;
 698    int ret = 0;
 699    uint64_t rip = 0;
 700
 701    if (hvf_process_events(cpu)) {
 702        return EXCP_HLT;
 703    }
 704
 705    do {
 706        if (cpu->vcpu_dirty) {
 707            hvf_put_registers(cpu);
 708            cpu->vcpu_dirty = false;
 709        }
 710
 711        if (hvf_inject_interrupts(cpu)) {
 712            return EXCP_INTERRUPT;
 713        }
 714        vmx_update_tpr(cpu);
 715
 716        qemu_mutex_unlock_iothread();
 717        if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) {
 718            qemu_mutex_lock_iothread();
 719            return EXCP_HLT;
 720        }
 721
 722        hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
 723        assert_hvf_ok(r);
 724
 725        /* handle VMEXIT */
 726        uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON);
 727        uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION);
 728        uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd,
 729                                           VMCS_EXIT_INSTRUCTION_LENGTH);
 730
 731        uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 732
 733        hvf_store_events(cpu, ins_len, idtvec_info);
 734        rip = rreg(cpu->hvf_fd, HV_X86_RIP);
 735        env->eflags = rreg(cpu->hvf_fd, HV_X86_RFLAGS);
 736
 737        qemu_mutex_lock_iothread();
 738
 739        update_apic_tpr(cpu);
 740        current_cpu = cpu;
 741
 742        ret = 0;
 743        switch (exit_reason) {
 744        case EXIT_REASON_HLT: {
 745            macvm_set_rip(cpu, rip + ins_len);
 746            if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 747                (env->eflags & IF_MASK))
 748                && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) &&
 749                !(idtvec_info & VMCS_IDT_VEC_VALID)) {
 750                cpu->halted = 1;
 751                ret = EXCP_HLT;
 752                break;
 753            }
 754            ret = EXCP_INTERRUPT;
 755            break;
 756        }
 757        case EXIT_REASON_MWAIT: {
 758            ret = EXCP_INTERRUPT;
 759            break;
 760        }
 761        /* Need to check if MMIO or unmapped fault */
 762        case EXIT_REASON_EPT_FAULT:
 763        {
 764            hvf_slot *slot;
 765            uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS);
 766
 767            if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) &&
 768                ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) {
 769                vmx_set_nmi_blocking(cpu);
 770            }
 771
 772            slot = hvf_find_overlap_slot(gpa, 1);
 773            /* mmio */
 774            if (ept_emulation_fault(slot, gpa, exit_qual)) {
 775                struct x86_decode decode;
 776
 777                load_regs(cpu);
 778                decode_instruction(env, &decode);
 779                exec_instruction(env, &decode);
 780                store_regs(cpu);
 781                break;
 782            }
 783            break;
 784        }
 785        case EXIT_REASON_INOUT:
 786        {
 787            uint32_t in = (exit_qual & 8) != 0;
 788            uint32_t size =  (exit_qual & 7) + 1;
 789            uint32_t string =  (exit_qual & 16) != 0;
 790            uint32_t port =  exit_qual >> 16;
 791            /*uint32_t rep = (exit_qual & 0x20) != 0;*/
 792
 793            if (!string && in) {
 794                uint64_t val = 0;
 795                load_regs(cpu);
 796                hvf_handle_io(env, port, &val, 0, size, 1);
 797                if (size == 1) {
 798                    AL(env) = val;
 799                } else if (size == 2) {
 800                    AX(env) = val;
 801                } else if (size == 4) {
 802                    RAX(env) = (uint32_t)val;
 803                } else {
 804                    RAX(env) = (uint64_t)val;
 805                }
 806                env->eip += ins_len;
 807                store_regs(cpu);
 808                break;
 809            } else if (!string && !in) {
 810                RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX);
 811                hvf_handle_io(env, port, &RAX(env), 1, size, 1);
 812                macvm_set_rip(cpu, rip + ins_len);
 813                break;
 814            }
 815            struct x86_decode decode;
 816
 817            load_regs(cpu);
 818            decode_instruction(env, &decode);
 819            assert(ins_len == decode.len);
 820            exec_instruction(env, &decode);
 821            store_regs(cpu);
 822
 823            break;
 824        }
 825        case EXIT_REASON_CPUID: {
 826            uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 827            uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX);
 828            uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 829            uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 830
 831            if (rax == 1) {
 832                /* CPUID1.ecx.OSXSAVE needs to know CR4 */
 833                env->cr[4] = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4);
 834            }
 835            hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx);
 836
 837            wreg(cpu->hvf_fd, HV_X86_RAX, rax);
 838            wreg(cpu->hvf_fd, HV_X86_RBX, rbx);
 839            wreg(cpu->hvf_fd, HV_X86_RCX, rcx);
 840            wreg(cpu->hvf_fd, HV_X86_RDX, rdx);
 841
 842            macvm_set_rip(cpu, rip + ins_len);
 843            break;
 844        }
 845        case EXIT_REASON_XSETBV: {
 846            X86CPU *x86_cpu = X86_CPU(cpu);
 847            CPUX86State *env = &x86_cpu->env;
 848            uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX);
 849            uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX);
 850            uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX);
 851
 852            if (ecx) {
 853                macvm_set_rip(cpu, rip + ins_len);
 854                break;
 855            }
 856            env->xcr0 = ((uint64_t)edx << 32) | eax;
 857            wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1);
 858            macvm_set_rip(cpu, rip + ins_len);
 859            break;
 860        }
 861        case EXIT_REASON_INTR_WINDOW:
 862            vmx_clear_int_window_exiting(cpu);
 863            ret = EXCP_INTERRUPT;
 864            break;
 865        case EXIT_REASON_NMI_WINDOW:
 866            vmx_clear_nmi_window_exiting(cpu);
 867            ret = EXCP_INTERRUPT;
 868            break;
 869        case EXIT_REASON_EXT_INTR:
 870            /* force exit and allow io handling */
 871            ret = EXCP_INTERRUPT;
 872            break;
 873        case EXIT_REASON_RDMSR:
 874        case EXIT_REASON_WRMSR:
 875        {
 876            load_regs(cpu);
 877            if (exit_reason == EXIT_REASON_RDMSR) {
 878                simulate_rdmsr(cpu);
 879            } else {
 880                simulate_wrmsr(cpu);
 881            }
 882            env->eip += ins_len;
 883            store_regs(cpu);
 884            break;
 885        }
 886        case EXIT_REASON_CR_ACCESS: {
 887            int cr;
 888            int reg;
 889
 890            load_regs(cpu);
 891            cr = exit_qual & 15;
 892            reg = (exit_qual >> 8) & 15;
 893
 894            switch (cr) {
 895            case 0x0: {
 896                macvm_set_cr0(cpu->hvf_fd, RRX(env, reg));
 897                break;
 898            }
 899            case 4: {
 900                macvm_set_cr4(cpu->hvf_fd, RRX(env, reg));
 901                break;
 902            }
 903            case 8: {
 904                X86CPU *x86_cpu = X86_CPU(cpu);
 905                if (exit_qual & 0x10) {
 906                    RRX(env, reg) = cpu_get_apic_tpr(x86_cpu->apic_state);
 907                } else {
 908                    int tpr = RRX(env, reg);
 909                    cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 910                    ret = EXCP_INTERRUPT;
 911                }
 912                break;
 913            }
 914            default:
 915                error_report("Unrecognized CR %d", cr);
 916                abort();
 917            }
 918            env->eip += ins_len;
 919            store_regs(cpu);
 920            break;
 921        }
 922        case EXIT_REASON_APIC_ACCESS: { /* TODO */
 923            struct x86_decode decode;
 924
 925            load_regs(cpu);
 926            decode_instruction(env, &decode);
 927            exec_instruction(env, &decode);
 928            store_regs(cpu);
 929            break;
 930        }
 931        case EXIT_REASON_TPR: {
 932            ret = 1;
 933            break;
 934        }
 935        case EXIT_REASON_TASK_SWITCH: {
 936            uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO);
 937            x68_segment_selector sel = {.sel = exit_qual & 0xffff};
 938            vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3,
 939             vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo
 940             & VMCS_INTR_T_MASK);
 941            break;
 942        }
 943        case EXIT_REASON_TRIPLE_FAULT: {
 944            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 945            ret = EXCP_INTERRUPT;
 946            break;
 947        }
 948        case EXIT_REASON_RDPMC:
 949            wreg(cpu->hvf_fd, HV_X86_RAX, 0);
 950            wreg(cpu->hvf_fd, HV_X86_RDX, 0);
 951            macvm_set_rip(cpu, rip + ins_len);
 952            break;
 953        case VMX_REASON_VMCALL:
 954            env->exception_nr = EXCP0D_GPF;
 955            env->exception_injected = 1;
 956            env->has_error_code = true;
 957            env->error_code = 0;
 958            break;
 959        default:
 960            error_report("%llx: unhandled exit %llx", rip, exit_reason);
 961        }
 962    } while (ret == 0);
 963
 964    return ret;
 965}
 966
 967bool hvf_allowed;
 968
 969static int hvf_accel_init(MachineState *ms)
 970{
 971    int x;
 972    hv_return_t ret;
 973    HVFState *s;
 974
 975    ret = hv_vm_create(HV_VM_DEFAULT);
 976    assert_hvf_ok(ret);
 977
 978    s = g_new0(HVFState, 1);
 979 
 980    s->num_slots = 32;
 981    for (x = 0; x < s->num_slots; ++x) {
 982        s->slots[x].size = 0;
 983        s->slots[x].slot_id = x;
 984    }
 985  
 986    hvf_state = s;
 987    memory_listener_register(&hvf_memory_listener, &address_space_memory);
 988    return 0;
 989}
 990
 991static void hvf_accel_class_init(ObjectClass *oc, void *data)
 992{
 993    AccelClass *ac = ACCEL_CLASS(oc);
 994    ac->name = "HVF";
 995    ac->init_machine = hvf_accel_init;
 996    ac->allowed = &hvf_allowed;
 997}
 998
 999static const TypeInfo hvf_accel_type = {
1000    .name = TYPE_HVF_ACCEL,
1001    .parent = TYPE_ACCEL,
1002    .class_init = hvf_accel_class_init,
1003};
1004
1005static void hvf_type_init(void)
1006{
1007    type_register_static(&hvf_accel_type);
1008}
1009
1010type_init(hvf_type_init);
1011