qemu/target/i386/hax-all.c
<<
>>
Prefs
   1/*
   2 * QEMU HAX support
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *           Red Hat, Inc. 2008
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Glauber Costa     <gcosta@redhat.com>
  10 *
  11 * Copyright (c) 2011 Intel Corporation
  12 *  Written by:
  13 *  Jiang Yunhong<yunhong.jiang@intel.com>
  14 *  Xin Xiaohui<xiaohui.xin@intel.com>
  15 *  Zhang Xiantao<xiantao.zhang@intel.com>
  16 *
  17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18 * See the COPYING file in the top-level directory.
  19 *
  20 */
  21
  22/*
  23 * HAX common code for both windows and darwin
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "cpu.h"
  28#include "exec/address-spaces.h"
  29
  30#include "qemu-common.h"
  31#include "hax-i386.h"
  32#include "sysemu/accel.h"
  33#include "sysemu/sysemu.h"
  34#include "qemu/main-loop.h"
  35#include "hw/boards.h"
  36
  37#define DEBUG_HAX 0
  38
  39#define DPRINTF(fmt, ...) \
  40    do { \
  41        if (DEBUG_HAX) { \
  42            fprintf(stdout, fmt, ## __VA_ARGS__); \
  43        } \
  44    } while (0)
  45
  46/* Current version */
  47const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  48/* Minimum HAX kernel version */
  49const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  50
  51static bool hax_allowed;
  52
  53struct hax_state hax_global;
  54
  55static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  56static int hax_arch_get_registers(CPUArchState *env);
  57
  58int hax_enabled(void)
  59{
  60    return hax_allowed;
  61}
  62
  63int valid_hax_tunnel_size(uint16_t size)
  64{
  65    return size >= sizeof(struct hax_tunnel);
  66}
  67
  68hax_fd hax_vcpu_get_fd(CPUArchState *env)
  69{
  70    struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
  71    if (!vcpu) {
  72        return HAX_INVALID_FD;
  73    }
  74    return vcpu->fd;
  75}
  76
  77static int hax_get_capability(struct hax_state *hax)
  78{
  79    int ret;
  80    struct hax_capabilityinfo capinfo, *cap = &capinfo;
  81
  82    ret = hax_capability(hax, cap);
  83    if (ret) {
  84        return ret;
  85    }
  86
  87    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  88        if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  89            DPRINTF
  90                ("VTX feature is not enabled, HAX driver will not work.\n");
  91        } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  92            DPRINTF
  93                ("NX feature is not enabled, HAX driver will not work.\n");
  94        }
  95        return -ENXIO;
  96
  97    }
  98
  99    if (!(cap->winfo & HAX_CAP_UG)) {
 100        fprintf(stderr, "UG mode is not supported by the hardware.\n");
 101        return -ENOTSUP;
 102    }
 103
 104    hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
 105
 106    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 107        if (cap->mem_quota < hax->mem_quota) {
 108            fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 109            return -ENOSPC;
 110        }
 111    }
 112    return 0;
 113}
 114
 115static int hax_version_support(struct hax_state *hax)
 116{
 117    int ret;
 118    struct hax_module_version version;
 119
 120    ret = hax_mod_version(hax, &version);
 121    if (ret < 0) {
 122        return 0;
 123    }
 124
 125    if (hax_min_version > version.cur_version) {
 126        fprintf(stderr, "Incompatible HAX module version %d,",
 127                version.cur_version);
 128        fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 129        return 0;
 130    }
 131    if (hax_cur_version < version.compat_version) {
 132        fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 133                hax_cur_version);
 134        fprintf(stderr, "requires minimum HAX API version %x\n",
 135                version.compat_version);
 136        return 0;
 137    }
 138
 139    return 1;
 140}
 141
 142int hax_vcpu_create(int id)
 143{
 144    struct hax_vcpu_state *vcpu = NULL;
 145    int ret;
 146
 147    if (!hax_global.vm) {
 148        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 149        return -1;
 150    }
 151
 152    if (hax_global.vm->vcpus[id]) {
 153        fprintf(stderr, "vcpu %x allocated already\n", id);
 154        return 0;
 155    }
 156
 157    vcpu = g_new0(struct hax_vcpu_state, 1);
 158
 159    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 160    if (ret) {
 161        fprintf(stderr, "Failed to create vcpu %x\n", id);
 162        goto error;
 163    }
 164
 165    vcpu->vcpu_id = id;
 166    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 167    if (hax_invalid_fd(vcpu->fd)) {
 168        fprintf(stderr, "Failed to open the vcpu\n");
 169        ret = -ENODEV;
 170        goto error;
 171    }
 172
 173    hax_global.vm->vcpus[id] = vcpu;
 174
 175    ret = hax_host_setup_vcpu_channel(vcpu);
 176    if (ret) {
 177        fprintf(stderr, "Invalid hax tunnel size\n");
 178        ret = -EINVAL;
 179        goto error;
 180    }
 181    return 0;
 182
 183  error:
 184    /* vcpu and tunnel will be closed automatically */
 185    if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 186        hax_close_fd(vcpu->fd);
 187    }
 188
 189    hax_global.vm->vcpus[id] = NULL;
 190    g_free(vcpu);
 191    return -1;
 192}
 193
 194int hax_vcpu_destroy(CPUState *cpu)
 195{
 196    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 197
 198    if (!hax_global.vm) {
 199        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 200        return -1;
 201    }
 202
 203    if (!vcpu) {
 204        return 0;
 205    }
 206
 207    /*
 208     * 1. The hax_tunnel is also destroyed when vcpu is destroyed
 209     * 2. close fd will cause hax module vcpu be cleaned
 210     */
 211    hax_close_fd(vcpu->fd);
 212    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 213    g_free(vcpu);
 214    return 0;
 215}
 216
 217int hax_init_vcpu(CPUState *cpu)
 218{
 219    int ret;
 220
 221    ret = hax_vcpu_create(cpu->cpu_index);
 222    if (ret < 0) {
 223        fprintf(stderr, "Failed to create HAX vcpu\n");
 224        exit(-1);
 225    }
 226
 227    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 228    cpu->vcpu_dirty = true;
 229    qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 230
 231    return ret;
 232}
 233
 234struct hax_vm *hax_vm_create(struct hax_state *hax)
 235{
 236    struct hax_vm *vm;
 237    int vm_id = 0, ret;
 238
 239    if (hax_invalid_fd(hax->fd)) {
 240        return NULL;
 241    }
 242
 243    if (hax->vm) {
 244        return hax->vm;
 245    }
 246
 247    vm = g_new0(struct hax_vm, 1);
 248
 249    ret = hax_host_create_vm(hax, &vm_id);
 250    if (ret) {
 251        fprintf(stderr, "Failed to create vm %x\n", ret);
 252        goto error;
 253    }
 254    vm->id = vm_id;
 255    vm->fd = hax_host_open_vm(hax, vm_id);
 256    if (hax_invalid_fd(vm->fd)) {
 257        fprintf(stderr, "Failed to open vm %d\n", vm_id);
 258        goto error;
 259    }
 260
 261    hax->vm = vm;
 262    return vm;
 263
 264  error:
 265    g_free(vm);
 266    hax->vm = NULL;
 267    return NULL;
 268}
 269
 270int hax_vm_destroy(struct hax_vm *vm)
 271{
 272    int i;
 273
 274    for (i = 0; i < HAX_MAX_VCPU; i++)
 275        if (vm->vcpus[i]) {
 276            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 277            return -1;
 278        }
 279    hax_close_fd(vm->fd);
 280    g_free(vm);
 281    hax_global.vm = NULL;
 282    return 0;
 283}
 284
 285static void hax_handle_interrupt(CPUState *cpu, int mask)
 286{
 287    cpu->interrupt_request |= mask;
 288
 289    if (!qemu_cpu_is_self(cpu)) {
 290        qemu_cpu_kick(cpu);
 291    }
 292}
 293
 294static int hax_init(ram_addr_t ram_size)
 295{
 296    struct hax_state *hax = NULL;
 297    struct hax_qemu_version qversion;
 298    int ret;
 299
 300    hax = &hax_global;
 301
 302    memset(hax, 0, sizeof(struct hax_state));
 303    hax->mem_quota = ram_size;
 304
 305    hax->fd = hax_mod_open();
 306    if (hax_invalid_fd(hax->fd)) {
 307        hax->fd = 0;
 308        ret = -ENODEV;
 309        goto error;
 310    }
 311
 312    ret = hax_get_capability(hax);
 313
 314    if (ret) {
 315        if (ret != -ENOSPC) {
 316            ret = -EINVAL;
 317        }
 318        goto error;
 319    }
 320
 321    if (!hax_version_support(hax)) {
 322        ret = -EINVAL;
 323        goto error;
 324    }
 325
 326    hax->vm = hax_vm_create(hax);
 327    if (!hax->vm) {
 328        fprintf(stderr, "Failed to create HAX VM\n");
 329        ret = -EINVAL;
 330        goto error;
 331    }
 332
 333    hax_memory_init();
 334
 335    qversion.cur_version = hax_cur_version;
 336    qversion.min_version = hax_min_version;
 337    hax_notify_qemu_version(hax->vm->fd, &qversion);
 338    cpu_interrupt_handler = hax_handle_interrupt;
 339
 340    return ret;
 341  error:
 342    if (hax->vm) {
 343        hax_vm_destroy(hax->vm);
 344    }
 345    if (hax->fd) {
 346        hax_mod_close(hax);
 347    }
 348
 349    return ret;
 350}
 351
 352static int hax_accel_init(MachineState *ms)
 353{
 354    int ret = hax_init(ms->ram_size);
 355
 356    if (ret && (ret != -ENOSPC)) {
 357        fprintf(stderr, "No accelerator found.\n");
 358    } else {
 359        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 360                !ret ? "working" : "not working",
 361                !ret ? "fast virt" : "emulation");
 362    }
 363    return ret;
 364}
 365
 366static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 367{
 368    if (hft->direction < 2) {
 369        cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
 370                               hft->direction);
 371    } else {
 372        /*
 373         * HAX API v4 supports transferring data between two MMIO addresses,
 374         * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 375         *  hft->direction == 2: gpa ==> gpa2
 376         */
 377        uint64_t value;
 378        cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
 379        cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
 380    }
 381
 382    return 0;
 383}
 384
 385static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 386                         int direction, int size, int count, void *buffer)
 387{
 388    uint8_t *ptr;
 389    int i;
 390    MemTxAttrs attrs = { 0 };
 391
 392    if (!df) {
 393        ptr = (uint8_t *) buffer;
 394    } else {
 395        ptr = buffer + size * count - size;
 396    }
 397    for (i = 0; i < count; i++) {
 398        address_space_rw(&address_space_io, port, attrs,
 399                         ptr, size, direction == HAX_EXIT_IO_OUT);
 400        if (!df) {
 401            ptr += size;
 402        } else {
 403            ptr -= size;
 404        }
 405    }
 406
 407    return 0;
 408}
 409
 410static int hax_vcpu_interrupt(CPUArchState *env)
 411{
 412    CPUState *cpu = env_cpu(env);
 413    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 414    struct hax_tunnel *ht = vcpu->tunnel;
 415
 416    /*
 417     * Try to inject an interrupt if the guest can accept it
 418     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 419     */
 420    if (ht->ready_for_interrupt_injection &&
 421        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 422        int irq;
 423
 424        irq = cpu_get_pic_interrupt(env);
 425        if (irq >= 0) {
 426            hax_inject_interrupt(env, irq);
 427            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 428        }
 429    }
 430
 431    /* If we have an interrupt but the guest is not ready to receive an
 432     * interrupt, request an interrupt window exit.  This will
 433     * cause a return to userspace as soon as the guest is ready to
 434     * receive interrupts. */
 435    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 436        ht->request_interrupt_window = 1;
 437    } else {
 438        ht->request_interrupt_window = 0;
 439    }
 440    return 0;
 441}
 442
 443void hax_raise_event(CPUState *cpu)
 444{
 445    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 446
 447    if (!vcpu) {
 448        return;
 449    }
 450    vcpu->tunnel->user_event_pending = 1;
 451}
 452
 453/*
 454 * Ask hax kernel module to run the CPU for us till:
 455 * 1. Guest crash or shutdown
 456 * 2. Need QEMU's emulation like guest execute MMIO instruction
 457 * 3. Guest execute HLT
 458 * 4. QEMU have Signal/event pending
 459 * 5. An unknown VMX exit happens
 460 */
 461static int hax_vcpu_hax_exec(CPUArchState *env)
 462{
 463    int ret = 0;
 464    CPUState *cpu = env_cpu(env);
 465    X86CPU *x86_cpu = X86_CPU(cpu);
 466    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 467    struct hax_tunnel *ht = vcpu->tunnel;
 468
 469    if (!hax_enabled()) {
 470        DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 471        return 0;
 472    }
 473
 474    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 475        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 476        apic_poll_irq(x86_cpu->apic_state);
 477    }
 478
 479    /* After a vcpu is halted (either because it is an AP and has just been
 480     * reset, or because it has executed the HLT instruction), it will not be
 481     * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
 482     * for events that may change the halted state of this vcpu:
 483     *  a) Maskable interrupt, when RFLAGS.IF is 1;
 484     *     Note: env->eflags may not reflect the current RFLAGS state, because
 485     *           it is not updated after each hax_vcpu_run(). We cannot afford
 486     *           to fail to recognize any unhalt-by-maskable-interrupt event
 487     *           (in which case the vcpu will halt forever), and yet we cannot
 488     *           afford the overhead of hax_vcpu_sync_state(). The current
 489     *           solution is to err on the side of caution and have the HLT
 490     *           handler (see case HAX_EXIT_HLT below) unconditionally set the
 491     *           IF_MASK bit in env->eflags, which, in effect, disables the
 492     *           RFLAGS.IF check.
 493     *  b) NMI;
 494     *  c) INIT signal;
 495     *  d) SIPI signal.
 496     */
 497    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 498         (env->eflags & IF_MASK)) ||
 499        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 500        cpu->halted = 0;
 501    }
 502
 503    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 504        DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 505                cpu->cpu_index);
 506        do_cpu_init(x86_cpu);
 507        hax_vcpu_sync_state(env, 1);
 508    }
 509
 510    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 511        DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 512                cpu->cpu_index);
 513        hax_vcpu_sync_state(env, 0);
 514        do_cpu_sipi(x86_cpu);
 515        hax_vcpu_sync_state(env, 1);
 516    }
 517
 518    if (cpu->halted) {
 519        /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
 520         * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
 521         * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
 522         * until the vcpu is unhalted.
 523         */
 524        cpu->exception_index = EXCP_HLT;
 525        return 0;
 526    }
 527
 528    do {
 529        int hax_ret;
 530
 531        if (cpu->exit_request) {
 532            ret = 1;
 533            break;
 534        }
 535
 536        hax_vcpu_interrupt(env);
 537
 538        qemu_mutex_unlock_iothread();
 539        cpu_exec_start(cpu);
 540        hax_ret = hax_vcpu_run(vcpu);
 541        cpu_exec_end(cpu);
 542        qemu_mutex_lock_iothread();
 543
 544        /* Simply continue the vcpu_run if system call interrupted */
 545        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 546            DPRINTF("io window interrupted\n");
 547            continue;
 548        }
 549
 550        if (hax_ret < 0) {
 551            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 552            abort();
 553        }
 554        switch (ht->_exit_status) {
 555        case HAX_EXIT_IO:
 556            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 557                            ht->pio._direction,
 558                            ht->pio._size, ht->pio._count, vcpu->iobuf);
 559            break;
 560        case HAX_EXIT_FAST_MMIO:
 561            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 562            break;
 563        /* Guest state changed, currently only for shutdown */
 564        case HAX_EXIT_STATECHANGE:
 565            fprintf(stdout, "VCPU shutdown request\n");
 566            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 567            hax_vcpu_sync_state(env, 0);
 568            ret = 1;
 569            break;
 570        case HAX_EXIT_UNKNOWN_VMEXIT:
 571            fprintf(stderr, "Unknown VMX exit %x from guest\n",
 572                    ht->_exit_reason);
 573            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 574            hax_vcpu_sync_state(env, 0);
 575            cpu_dump_state(cpu, stderr, 0);
 576            ret = -1;
 577            break;
 578        case HAX_EXIT_HLT:
 579            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 580                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 581                /* hlt instruction with interrupt disabled is shutdown */
 582                env->eflags |= IF_MASK;
 583                cpu->halted = 1;
 584                cpu->exception_index = EXCP_HLT;
 585                ret = 1;
 586            }
 587            break;
 588        /* these situations will continue to hax module */
 589        case HAX_EXIT_INTERRUPT:
 590        case HAX_EXIT_PAUSED:
 591            break;
 592        case HAX_EXIT_MMIO:
 593            /* Should not happen on UG system */
 594            fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 595            ret = -1;
 596            break;
 597        case HAX_EXIT_REAL:
 598            /* Should not happen on UG system */
 599            fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 600            ret = -1;
 601            break;
 602        default:
 603            fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 604            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 605            hax_vcpu_sync_state(env, 0);
 606            cpu_dump_state(cpu, stderr, 0);
 607            ret = 1;
 608            break;
 609        }
 610    } while (!ret);
 611
 612    if (cpu->exit_request) {
 613        cpu->exit_request = 0;
 614        cpu->exception_index = EXCP_INTERRUPT;
 615    }
 616    return ret < 0;
 617}
 618
 619static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 620{
 621    CPUArchState *env = cpu->env_ptr;
 622
 623    hax_arch_get_registers(env);
 624    cpu->vcpu_dirty = true;
 625}
 626
 627void hax_cpu_synchronize_state(CPUState *cpu)
 628{
 629    if (!cpu->vcpu_dirty) {
 630        run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 631    }
 632}
 633
 634static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 635                                              run_on_cpu_data arg)
 636{
 637    CPUArchState *env = cpu->env_ptr;
 638
 639    hax_vcpu_sync_state(env, 1);
 640    cpu->vcpu_dirty = false;
 641}
 642
 643void hax_cpu_synchronize_post_reset(CPUState *cpu)
 644{
 645    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 646}
 647
 648static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 649{
 650    CPUArchState *env = cpu->env_ptr;
 651
 652    hax_vcpu_sync_state(env, 1);
 653    cpu->vcpu_dirty = false;
 654}
 655
 656void hax_cpu_synchronize_post_init(CPUState *cpu)
 657{
 658    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 659}
 660
 661static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 662{
 663    cpu->vcpu_dirty = true;
 664}
 665
 666void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 667{
 668    run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 669}
 670
 671int hax_smp_cpu_exec(CPUState *cpu)
 672{
 673    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 674    int fatal;
 675    int ret;
 676
 677    while (1) {
 678        if (cpu->exception_index >= EXCP_INTERRUPT) {
 679            ret = cpu->exception_index;
 680            cpu->exception_index = -1;
 681            break;
 682        }
 683
 684        fatal = hax_vcpu_hax_exec(env);
 685
 686        if (fatal) {
 687            fprintf(stderr, "Unsupported HAX vcpu return\n");
 688            abort();
 689        }
 690    }
 691
 692    return ret;
 693}
 694
 695static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 696{
 697    memset(lhs, 0, sizeof(struct segment_desc_t));
 698    lhs->selector = rhs->selector;
 699    lhs->base = rhs->base;
 700    lhs->limit = rhs->limit;
 701    lhs->type = 3;
 702    lhs->present = 1;
 703    lhs->dpl = 3;
 704    lhs->operand_size = 0;
 705    lhs->desc = 1;
 706    lhs->long_mode = 0;
 707    lhs->granularity = 0;
 708    lhs->available = 0;
 709}
 710
 711static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 712{
 713    lhs->selector = rhs->selector;
 714    lhs->base = rhs->base;
 715    lhs->limit = rhs->limit;
 716    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 717        | (rhs->present * DESC_P_MASK)
 718        | (rhs->dpl << DESC_DPL_SHIFT)
 719        | (rhs->operand_size << DESC_B_SHIFT)
 720        | (rhs->desc * DESC_S_MASK)
 721        | (rhs->long_mode << DESC_L_SHIFT)
 722        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 723}
 724
 725static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 726{
 727    unsigned flags = rhs->flags;
 728
 729    memset(lhs, 0, sizeof(struct segment_desc_t));
 730    lhs->selector = rhs->selector;
 731    lhs->base = rhs->base;
 732    lhs->limit = rhs->limit;
 733    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 734    lhs->present = (flags & DESC_P_MASK) != 0;
 735    lhs->dpl = rhs->selector & 3;
 736    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 737    lhs->desc = (flags & DESC_S_MASK) != 0;
 738    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 739    lhs->granularity = (flags & DESC_G_MASK) != 0;
 740    lhs->available = (flags & DESC_AVL_MASK) != 0;
 741}
 742
 743static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 744{
 745    target_ulong reg = *hax_reg;
 746
 747    if (set) {
 748        *hax_reg = *qemu_reg;
 749    } else {
 750        *qemu_reg = reg;
 751    }
 752}
 753
 754/* The sregs has been synced with HAX kernel already before this call */
 755static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 756{
 757    get_seg(&env->segs[R_CS], &sregs->_cs);
 758    get_seg(&env->segs[R_DS], &sregs->_ds);
 759    get_seg(&env->segs[R_ES], &sregs->_es);
 760    get_seg(&env->segs[R_FS], &sregs->_fs);
 761    get_seg(&env->segs[R_GS], &sregs->_gs);
 762    get_seg(&env->segs[R_SS], &sregs->_ss);
 763
 764    get_seg(&env->tr, &sregs->_tr);
 765    get_seg(&env->ldt, &sregs->_ldt);
 766    env->idt.limit = sregs->_idt.limit;
 767    env->idt.base = sregs->_idt.base;
 768    env->gdt.limit = sregs->_gdt.limit;
 769    env->gdt.base = sregs->_gdt.base;
 770    return 0;
 771}
 772
 773static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 774{
 775    if ((env->eflags & VM_MASK)) {
 776        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 777        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 778        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 779        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 780        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 781        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 782    } else {
 783        set_seg(&sregs->_cs, &env->segs[R_CS]);
 784        set_seg(&sregs->_ds, &env->segs[R_DS]);
 785        set_seg(&sregs->_es, &env->segs[R_ES]);
 786        set_seg(&sregs->_fs, &env->segs[R_FS]);
 787        set_seg(&sregs->_gs, &env->segs[R_GS]);
 788        set_seg(&sregs->_ss, &env->segs[R_SS]);
 789
 790        if (env->cr[0] & CR0_PE_MASK) {
 791            /* force ss cpl to cs cpl */
 792            sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 793                                  (sregs->_cs.selector & 3);
 794            sregs->_ss.dpl = sregs->_ss.selector & 3;
 795        }
 796    }
 797
 798    set_seg(&sregs->_tr, &env->tr);
 799    set_seg(&sregs->_ldt, &env->ldt);
 800    sregs->_idt.limit = env->idt.limit;
 801    sregs->_idt.base = env->idt.base;
 802    sregs->_gdt.limit = env->gdt.limit;
 803    sregs->_gdt.base = env->gdt.base;
 804    return 0;
 805}
 806
 807static int hax_sync_vcpu_register(CPUArchState *env, int set)
 808{
 809    struct vcpu_state_t regs;
 810    int ret;
 811    memset(&regs, 0, sizeof(struct vcpu_state_t));
 812
 813    if (!set) {
 814        ret = hax_sync_vcpu_state(env, &regs, 0);
 815        if (ret < 0) {
 816            return -1;
 817        }
 818    }
 819
 820    /* generic register */
 821    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 822    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 823    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 824    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 825    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 826    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 827    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 828    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 829#ifdef TARGET_X86_64
 830    hax_getput_reg(&regs._r8, &env->regs[8], set);
 831    hax_getput_reg(&regs._r9, &env->regs[9], set);
 832    hax_getput_reg(&regs._r10, &env->regs[10], set);
 833    hax_getput_reg(&regs._r11, &env->regs[11], set);
 834    hax_getput_reg(&regs._r12, &env->regs[12], set);
 835    hax_getput_reg(&regs._r13, &env->regs[13], set);
 836    hax_getput_reg(&regs._r14, &env->regs[14], set);
 837    hax_getput_reg(&regs._r15, &env->regs[15], set);
 838#endif
 839    hax_getput_reg(&regs._rflags, &env->eflags, set);
 840    hax_getput_reg(&regs._rip, &env->eip, set);
 841
 842    if (set) {
 843        regs._cr0 = env->cr[0];
 844        regs._cr2 = env->cr[2];
 845        regs._cr3 = env->cr[3];
 846        regs._cr4 = env->cr[4];
 847        hax_set_segments(env, &regs);
 848    } else {
 849        env->cr[0] = regs._cr0;
 850        env->cr[2] = regs._cr2;
 851        env->cr[3] = regs._cr3;
 852        env->cr[4] = regs._cr4;
 853        hax_get_segments(env, &regs);
 854    }
 855
 856    if (set) {
 857        ret = hax_sync_vcpu_state(env, &regs, 1);
 858        if (ret < 0) {
 859            return -1;
 860        }
 861    }
 862    return 0;
 863}
 864
 865static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 866                              uint64_t value)
 867{
 868    item->entry = index;
 869    item->value = value;
 870}
 871
 872static int hax_get_msrs(CPUArchState *env)
 873{
 874    struct hax_msr_data md;
 875    struct vmx_msr *msrs = md.entries;
 876    int ret, i, n;
 877
 878    n = 0;
 879    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 880    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 881    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 882    msrs[n++].entry = MSR_IA32_TSC;
 883#ifdef TARGET_X86_64
 884    msrs[n++].entry = MSR_EFER;
 885    msrs[n++].entry = MSR_STAR;
 886    msrs[n++].entry = MSR_LSTAR;
 887    msrs[n++].entry = MSR_CSTAR;
 888    msrs[n++].entry = MSR_FMASK;
 889    msrs[n++].entry = MSR_KERNELGSBASE;
 890#endif
 891    md.nr_msr = n;
 892    ret = hax_sync_msr(env, &md, 0);
 893    if (ret < 0) {
 894        return ret;
 895    }
 896
 897    for (i = 0; i < md.done; i++) {
 898        switch (msrs[i].entry) {
 899        case MSR_IA32_SYSENTER_CS:
 900            env->sysenter_cs = msrs[i].value;
 901            break;
 902        case MSR_IA32_SYSENTER_ESP:
 903            env->sysenter_esp = msrs[i].value;
 904            break;
 905        case MSR_IA32_SYSENTER_EIP:
 906            env->sysenter_eip = msrs[i].value;
 907            break;
 908        case MSR_IA32_TSC:
 909            env->tsc = msrs[i].value;
 910            break;
 911#ifdef TARGET_X86_64
 912        case MSR_EFER:
 913            env->efer = msrs[i].value;
 914            break;
 915        case MSR_STAR:
 916            env->star = msrs[i].value;
 917            break;
 918        case MSR_LSTAR:
 919            env->lstar = msrs[i].value;
 920            break;
 921        case MSR_CSTAR:
 922            env->cstar = msrs[i].value;
 923            break;
 924        case MSR_FMASK:
 925            env->fmask = msrs[i].value;
 926            break;
 927        case MSR_KERNELGSBASE:
 928            env->kernelgsbase = msrs[i].value;
 929            break;
 930#endif
 931        }
 932    }
 933
 934    return 0;
 935}
 936
 937static int hax_set_msrs(CPUArchState *env)
 938{
 939    struct hax_msr_data md;
 940    struct vmx_msr *msrs;
 941    msrs = md.entries;
 942    int n = 0;
 943
 944    memset(&md, 0, sizeof(struct hax_msr_data));
 945    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 946    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 947    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 948    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 949#ifdef TARGET_X86_64
 950    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 951    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 952    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 953    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 954    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 955    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 956#endif
 957    md.nr_msr = n;
 958    md.done = 0;
 959
 960    return hax_sync_msr(env, &md, 1);
 961}
 962
 963static int hax_get_fpu(CPUArchState *env)
 964{
 965    struct fx_layout fpu;
 966    int i, ret;
 967
 968    ret = hax_sync_fpu(env, &fpu, 0);
 969    if (ret < 0) {
 970        return ret;
 971    }
 972
 973    env->fpstt = (fpu.fsw >> 11) & 7;
 974    env->fpus = fpu.fsw;
 975    env->fpuc = fpu.fcw;
 976    for (i = 0; i < 8; ++i) {
 977        env->fptags[i] = !((fpu.ftw >> i) & 1);
 978    }
 979    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
 980
 981    for (i = 0; i < 8; i++) {
 982        env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
 983        env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
 984        if (CPU_NB_REGS > 8) {
 985            env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
 986            env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
 987        }
 988    }
 989    env->mxcsr = fpu.mxcsr;
 990
 991    return 0;
 992}
 993
 994static int hax_set_fpu(CPUArchState *env)
 995{
 996    struct fx_layout fpu;
 997    int i;
 998
 999    memset(&fpu, 0, sizeof(fpu));
1000    fpu.fsw = env->fpus & ~(7 << 11);
1001    fpu.fsw |= (env->fpstt & 7) << 11;
1002    fpu.fcw = env->fpuc;
1003
1004    for (i = 0; i < 8; ++i) {
1005        fpu.ftw |= (!env->fptags[i]) << i;
1006    }
1007
1008    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1009    for (i = 0; i < 8; i++) {
1010        stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1011        stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1012        if (CPU_NB_REGS > 8) {
1013            stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1014            stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1015        }
1016    }
1017
1018    fpu.mxcsr = env->mxcsr;
1019
1020    return hax_sync_fpu(env, &fpu, 1);
1021}
1022
1023static int hax_arch_get_registers(CPUArchState *env)
1024{
1025    int ret;
1026
1027    ret = hax_sync_vcpu_register(env, 0);
1028    if (ret < 0) {
1029        return ret;
1030    }
1031
1032    ret = hax_get_fpu(env);
1033    if (ret < 0) {
1034        return ret;
1035    }
1036
1037    ret = hax_get_msrs(env);
1038    if (ret < 0) {
1039        return ret;
1040    }
1041
1042    x86_update_hflags(env);
1043    return 0;
1044}
1045
1046static int hax_arch_set_registers(CPUArchState *env)
1047{
1048    int ret;
1049    ret = hax_sync_vcpu_register(env, 1);
1050
1051    if (ret < 0) {
1052        fprintf(stderr, "Failed to sync vcpu reg\n");
1053        return ret;
1054    }
1055    ret = hax_set_fpu(env);
1056    if (ret < 0) {
1057        fprintf(stderr, "FPU failed\n");
1058        return ret;
1059    }
1060    ret = hax_set_msrs(env);
1061    if (ret < 0) {
1062        fprintf(stderr, "MSR failed\n");
1063        return ret;
1064    }
1065
1066    return 0;
1067}
1068
1069static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1070{
1071    if (hax_enabled()) {
1072        if (modified) {
1073            hax_arch_set_registers(env);
1074        } else {
1075            hax_arch_get_registers(env);
1076        }
1077    }
1078}
1079
1080/*
1081 * much simpler than kvm, at least in first stage because:
1082 * We don't need consider the device pass-through, we don't need
1083 * consider the framebuffer, and we may even remove the bios at all
1084 */
1085int hax_sync_vcpus(void)
1086{
1087    if (hax_enabled()) {
1088        CPUState *cpu;
1089
1090        cpu = first_cpu;
1091        if (!cpu) {
1092            return 0;
1093        }
1094
1095        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1096            int ret;
1097
1098            ret = hax_arch_set_registers(cpu->env_ptr);
1099            if (ret < 0) {
1100                return ret;
1101            }
1102        }
1103    }
1104
1105    return 0;
1106}
1107
1108void hax_reset_vcpu_state(void *opaque)
1109{
1110    CPUState *cpu;
1111    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1112        cpu->hax_vcpu->tunnel->user_event_pending = 0;
1113        cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1114    }
1115}
1116
1117static void hax_accel_class_init(ObjectClass *oc, void *data)
1118{
1119    AccelClass *ac = ACCEL_CLASS(oc);
1120    ac->name = "HAX";
1121    ac->init_machine = hax_accel_init;
1122    ac->allowed = &hax_allowed;
1123}
1124
1125static const TypeInfo hax_accel_type = {
1126    .name = ACCEL_CLASS_NAME("hax"),
1127    .parent = TYPE_ACCEL,
1128    .class_init = hax_accel_class_init,
1129};
1130
1131static void hax_type_init(void)
1132{
1133    type_register_static(&hax_accel_type);
1134}
1135
1136type_init(hax_type_init);
1137