qemu/target/i386/hax-all.c
<<
>>
Prefs
   1/*
   2 * QEMU HAX support
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *           Red Hat, Inc. 2008
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Glauber Costa     <gcosta@redhat.com>
  10 *
  11 * Copyright (c) 2011 Intel Corporation
  12 *  Written by:
  13 *  Jiang Yunhong<yunhong.jiang@intel.com>
  14 *  Xin Xiaohui<xiaohui.xin@intel.com>
  15 *  Zhang Xiantao<xiantao.zhang@intel.com>
  16 *
  17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18 * See the COPYING file in the top-level directory.
  19 *
  20 */
  21
  22/*
  23 * HAX common code for both windows and darwin
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "cpu.h"
  28#include "exec/address-spaces.h"
  29
  30#include "qemu-common.h"
  31#include "hax-i386.h"
  32#include "sysemu/accel.h"
  33#include "sysemu/reset.h"
  34#include "sysemu/runstate.h"
  35#include "qemu/main-loop.h"
  36#include "hw/boards.h"
  37
  38#define DEBUG_HAX 0
  39
  40#define DPRINTF(fmt, ...) \
  41    do { \
  42        if (DEBUG_HAX) { \
  43            fprintf(stdout, fmt, ## __VA_ARGS__); \
  44        } \
  45    } while (0)
  46
  47/* Current version */
  48const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  49/* Minimum HAX kernel version */
  50const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  51
  52static bool hax_allowed;
  53
  54struct hax_state hax_global;
  55
  56static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  57static int hax_arch_get_registers(CPUArchState *env);
  58
  59int hax_enabled(void)
  60{
  61    return hax_allowed;
  62}
  63
  64int valid_hax_tunnel_size(uint16_t size)
  65{
  66    return size >= sizeof(struct hax_tunnel);
  67}
  68
  69hax_fd hax_vcpu_get_fd(CPUArchState *env)
  70{
  71    struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
  72    if (!vcpu) {
  73        return HAX_INVALID_FD;
  74    }
  75    return vcpu->fd;
  76}
  77
  78static int hax_get_capability(struct hax_state *hax)
  79{
  80    int ret;
  81    struct hax_capabilityinfo capinfo, *cap = &capinfo;
  82
  83    ret = hax_capability(hax, cap);
  84    if (ret) {
  85        return ret;
  86    }
  87
  88    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  89        if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  90            DPRINTF
  91                ("VTX feature is not enabled, HAX driver will not work.\n");
  92        } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  93            DPRINTF
  94                ("NX feature is not enabled, HAX driver will not work.\n");
  95        }
  96        return -ENXIO;
  97
  98    }
  99
 100    if (!(cap->winfo & HAX_CAP_UG)) {
 101        fprintf(stderr, "UG mode is not supported by the hardware.\n");
 102        return -ENOTSUP;
 103    }
 104
 105    hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
 106
 107    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 108        if (cap->mem_quota < hax->mem_quota) {
 109            fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 110            return -ENOSPC;
 111        }
 112    }
 113    return 0;
 114}
 115
 116static int hax_version_support(struct hax_state *hax)
 117{
 118    int ret;
 119    struct hax_module_version version;
 120
 121    ret = hax_mod_version(hax, &version);
 122    if (ret < 0) {
 123        return 0;
 124    }
 125
 126    if (hax_min_version > version.cur_version) {
 127        fprintf(stderr, "Incompatible HAX module version %d,",
 128                version.cur_version);
 129        fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 130        return 0;
 131    }
 132    if (hax_cur_version < version.compat_version) {
 133        fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 134                hax_cur_version);
 135        fprintf(stderr, "requires minimum HAX API version %x\n",
 136                version.compat_version);
 137        return 0;
 138    }
 139
 140    return 1;
 141}
 142
 143int hax_vcpu_create(int id)
 144{
 145    struct hax_vcpu_state *vcpu = NULL;
 146    int ret;
 147
 148    if (!hax_global.vm) {
 149        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 150        return -1;
 151    }
 152
 153    if (hax_global.vm->vcpus[id]) {
 154        fprintf(stderr, "vcpu %x allocated already\n", id);
 155        return 0;
 156    }
 157
 158    vcpu = g_new0(struct hax_vcpu_state, 1);
 159
 160    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 161    if (ret) {
 162        fprintf(stderr, "Failed to create vcpu %x\n", id);
 163        goto error;
 164    }
 165
 166    vcpu->vcpu_id = id;
 167    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 168    if (hax_invalid_fd(vcpu->fd)) {
 169        fprintf(stderr, "Failed to open the vcpu\n");
 170        ret = -ENODEV;
 171        goto error;
 172    }
 173
 174    hax_global.vm->vcpus[id] = vcpu;
 175
 176    ret = hax_host_setup_vcpu_channel(vcpu);
 177    if (ret) {
 178        fprintf(stderr, "Invalid hax tunnel size\n");
 179        ret = -EINVAL;
 180        goto error;
 181    }
 182    return 0;
 183
 184  error:
 185    /* vcpu and tunnel will be closed automatically */
 186    if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 187        hax_close_fd(vcpu->fd);
 188    }
 189
 190    hax_global.vm->vcpus[id] = NULL;
 191    g_free(vcpu);
 192    return -1;
 193}
 194
 195int hax_vcpu_destroy(CPUState *cpu)
 196{
 197    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 198
 199    if (!hax_global.vm) {
 200        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 201        return -1;
 202    }
 203
 204    if (!vcpu) {
 205        return 0;
 206    }
 207
 208    /*
 209     * 1. The hax_tunnel is also destroyed when vcpu is destroyed
 210     * 2. close fd will cause hax module vcpu be cleaned
 211     */
 212    hax_close_fd(vcpu->fd);
 213    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 214    g_free(vcpu);
 215    return 0;
 216}
 217
 218int hax_init_vcpu(CPUState *cpu)
 219{
 220    int ret;
 221
 222    ret = hax_vcpu_create(cpu->cpu_index);
 223    if (ret < 0) {
 224        fprintf(stderr, "Failed to create HAX vcpu\n");
 225        exit(-1);
 226    }
 227
 228    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 229    cpu->vcpu_dirty = true;
 230    qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 231
 232    return ret;
 233}
 234
 235struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus)
 236{
 237    struct hax_vm *vm;
 238    int vm_id = 0, ret, i;
 239
 240    if (hax_invalid_fd(hax->fd)) {
 241        return NULL;
 242    }
 243
 244    if (hax->vm) {
 245        return hax->vm;
 246    }
 247
 248    if (max_cpus > HAX_MAX_VCPU) {
 249        fprintf(stderr, "Maximum VCPU number QEMU supported is %d\n", HAX_MAX_VCPU);
 250        return NULL;
 251    }
 252
 253    vm = g_new0(struct hax_vm, 1);
 254
 255    ret = hax_host_create_vm(hax, &vm_id);
 256    if (ret) {
 257        fprintf(stderr, "Failed to create vm %x\n", ret);
 258        goto error;
 259    }
 260    vm->id = vm_id;
 261    vm->fd = hax_host_open_vm(hax, vm_id);
 262    if (hax_invalid_fd(vm->fd)) {
 263        fprintf(stderr, "Failed to open vm %d\n", vm_id);
 264        goto error;
 265    }
 266
 267    vm->numvcpus = max_cpus;
 268    vm->vcpus = g_new0(struct hax_vcpu_state *, vm->numvcpus);
 269    for (i = 0; i < vm->numvcpus; i++) {
 270        vm->vcpus[i] = NULL;
 271    }
 272
 273    hax->vm = vm;
 274    return vm;
 275
 276  error:
 277    g_free(vm);
 278    hax->vm = NULL;
 279    return NULL;
 280}
 281
 282int hax_vm_destroy(struct hax_vm *vm)
 283{
 284    int i;
 285
 286    for (i = 0; i < vm->numvcpus; i++)
 287        if (vm->vcpus[i]) {
 288            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 289            return -1;
 290        }
 291    hax_close_fd(vm->fd);
 292    vm->numvcpus = 0;
 293    g_free(vm->vcpus);
 294    g_free(vm);
 295    hax_global.vm = NULL;
 296    return 0;
 297}
 298
 299static void hax_handle_interrupt(CPUState *cpu, int mask)
 300{
 301    cpu->interrupt_request |= mask;
 302
 303    if (!qemu_cpu_is_self(cpu)) {
 304        qemu_cpu_kick(cpu);
 305    }
 306}
 307
 308static int hax_init(ram_addr_t ram_size, int max_cpus)
 309{
 310    struct hax_state *hax = NULL;
 311    struct hax_qemu_version qversion;
 312    int ret;
 313
 314    hax = &hax_global;
 315
 316    memset(hax, 0, sizeof(struct hax_state));
 317    hax->mem_quota = ram_size;
 318
 319    hax->fd = hax_mod_open();
 320    if (hax_invalid_fd(hax->fd)) {
 321        hax->fd = 0;
 322        ret = -ENODEV;
 323        goto error;
 324    }
 325
 326    ret = hax_get_capability(hax);
 327
 328    if (ret) {
 329        if (ret != -ENOSPC) {
 330            ret = -EINVAL;
 331        }
 332        goto error;
 333    }
 334
 335    if (!hax_version_support(hax)) {
 336        ret = -EINVAL;
 337        goto error;
 338    }
 339
 340    hax->vm = hax_vm_create(hax, max_cpus);
 341    if (!hax->vm) {
 342        fprintf(stderr, "Failed to create HAX VM\n");
 343        ret = -EINVAL;
 344        goto error;
 345    }
 346
 347    hax_memory_init();
 348
 349    qversion.cur_version = hax_cur_version;
 350    qversion.min_version = hax_min_version;
 351    hax_notify_qemu_version(hax->vm->fd, &qversion);
 352    cpu_interrupt_handler = hax_handle_interrupt;
 353
 354    return ret;
 355  error:
 356    if (hax->vm) {
 357        hax_vm_destroy(hax->vm);
 358    }
 359    if (hax->fd) {
 360        hax_mod_close(hax);
 361    }
 362
 363    return ret;
 364}
 365
 366static int hax_accel_init(MachineState *ms)
 367{
 368    int ret = hax_init(ms->ram_size, (int)ms->smp.max_cpus);
 369
 370    if (ret && (ret != -ENOSPC)) {
 371        fprintf(stderr, "No accelerator found.\n");
 372    } else {
 373        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 374                !ret ? "working" : "not working",
 375                !ret ? "fast virt" : "emulation");
 376    }
 377    return ret;
 378}
 379
 380static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 381{
 382    if (hft->direction < 2) {
 383        cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
 384                               hft->direction);
 385    } else {
 386        /*
 387         * HAX API v4 supports transferring data between two MMIO addresses,
 388         * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 389         *  hft->direction == 2: gpa ==> gpa2
 390         */
 391        uint64_t value;
 392        cpu_physical_memory_read(hft->gpa, &value, hft->size);
 393        cpu_physical_memory_write(hft->gpa2, &value, hft->size);
 394    }
 395
 396    return 0;
 397}
 398
 399static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 400                         int direction, int size, int count, void *buffer)
 401{
 402    uint8_t *ptr;
 403    int i;
 404    MemTxAttrs attrs = { 0 };
 405
 406    if (!df) {
 407        ptr = (uint8_t *) buffer;
 408    } else {
 409        ptr = buffer + size * count - size;
 410    }
 411    for (i = 0; i < count; i++) {
 412        address_space_rw(&address_space_io, port, attrs,
 413                         ptr, size, direction == HAX_EXIT_IO_OUT);
 414        if (!df) {
 415            ptr += size;
 416        } else {
 417            ptr -= size;
 418        }
 419    }
 420
 421    return 0;
 422}
 423
 424static int hax_vcpu_interrupt(CPUArchState *env)
 425{
 426    CPUState *cpu = env_cpu(env);
 427    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 428    struct hax_tunnel *ht = vcpu->tunnel;
 429
 430    /*
 431     * Try to inject an interrupt if the guest can accept it
 432     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 433     */
 434    if (ht->ready_for_interrupt_injection &&
 435        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 436        int irq;
 437
 438        irq = cpu_get_pic_interrupt(env);
 439        if (irq >= 0) {
 440            hax_inject_interrupt(env, irq);
 441            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 442        }
 443    }
 444
 445    /* If we have an interrupt but the guest is not ready to receive an
 446     * interrupt, request an interrupt window exit.  This will
 447     * cause a return to userspace as soon as the guest is ready to
 448     * receive interrupts. */
 449    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 450        ht->request_interrupt_window = 1;
 451    } else {
 452        ht->request_interrupt_window = 0;
 453    }
 454    return 0;
 455}
 456
 457void hax_raise_event(CPUState *cpu)
 458{
 459    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 460
 461    if (!vcpu) {
 462        return;
 463    }
 464    vcpu->tunnel->user_event_pending = 1;
 465}
 466
 467/*
 468 * Ask hax kernel module to run the CPU for us till:
 469 * 1. Guest crash or shutdown
 470 * 2. Need QEMU's emulation like guest execute MMIO instruction
 471 * 3. Guest execute HLT
 472 * 4. QEMU have Signal/event pending
 473 * 5. An unknown VMX exit happens
 474 */
 475static int hax_vcpu_hax_exec(CPUArchState *env)
 476{
 477    int ret = 0;
 478    CPUState *cpu = env_cpu(env);
 479    X86CPU *x86_cpu = X86_CPU(cpu);
 480    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 481    struct hax_tunnel *ht = vcpu->tunnel;
 482
 483    if (!hax_enabled()) {
 484        DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 485        return 0;
 486    }
 487
 488    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 489        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 490        apic_poll_irq(x86_cpu->apic_state);
 491    }
 492
 493    /* After a vcpu is halted (either because it is an AP and has just been
 494     * reset, or because it has executed the HLT instruction), it will not be
 495     * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
 496     * for events that may change the halted state of this vcpu:
 497     *  a) Maskable interrupt, when RFLAGS.IF is 1;
 498     *     Note: env->eflags may not reflect the current RFLAGS state, because
 499     *           it is not updated after each hax_vcpu_run(). We cannot afford
 500     *           to fail to recognize any unhalt-by-maskable-interrupt event
 501     *           (in which case the vcpu will halt forever), and yet we cannot
 502     *           afford the overhead of hax_vcpu_sync_state(). The current
 503     *           solution is to err on the side of caution and have the HLT
 504     *           handler (see case HAX_EXIT_HLT below) unconditionally set the
 505     *           IF_MASK bit in env->eflags, which, in effect, disables the
 506     *           RFLAGS.IF check.
 507     *  b) NMI;
 508     *  c) INIT signal;
 509     *  d) SIPI signal.
 510     */
 511    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 512         (env->eflags & IF_MASK)) ||
 513        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 514        cpu->halted = 0;
 515    }
 516
 517    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 518        DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 519                cpu->cpu_index);
 520        do_cpu_init(x86_cpu);
 521        hax_vcpu_sync_state(env, 1);
 522    }
 523
 524    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 525        DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 526                cpu->cpu_index);
 527        hax_vcpu_sync_state(env, 0);
 528        do_cpu_sipi(x86_cpu);
 529        hax_vcpu_sync_state(env, 1);
 530    }
 531
 532    if (cpu->halted) {
 533        /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
 534         * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
 535         * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
 536         * until the vcpu is unhalted.
 537         */
 538        cpu->exception_index = EXCP_HLT;
 539        return 0;
 540    }
 541
 542    do {
 543        int hax_ret;
 544
 545        if (cpu->exit_request) {
 546            ret = 1;
 547            break;
 548        }
 549
 550        hax_vcpu_interrupt(env);
 551
 552        qemu_mutex_unlock_iothread();
 553        cpu_exec_start(cpu);
 554        hax_ret = hax_vcpu_run(vcpu);
 555        cpu_exec_end(cpu);
 556        qemu_mutex_lock_iothread();
 557
 558        /* Simply continue the vcpu_run if system call interrupted */
 559        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 560            DPRINTF("io window interrupted\n");
 561            continue;
 562        }
 563
 564        if (hax_ret < 0) {
 565            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 566            abort();
 567        }
 568        switch (ht->_exit_status) {
 569        case HAX_EXIT_IO:
 570            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 571                            ht->pio._direction,
 572                            ht->pio._size, ht->pio._count, vcpu->iobuf);
 573            break;
 574        case HAX_EXIT_FAST_MMIO:
 575            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 576            break;
 577        /* Guest state changed, currently only for shutdown */
 578        case HAX_EXIT_STATECHANGE:
 579            fprintf(stdout, "VCPU shutdown request\n");
 580            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 581            hax_vcpu_sync_state(env, 0);
 582            ret = 1;
 583            break;
 584        case HAX_EXIT_UNKNOWN_VMEXIT:
 585            fprintf(stderr, "Unknown VMX exit %x from guest\n",
 586                    ht->_exit_reason);
 587            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 588            hax_vcpu_sync_state(env, 0);
 589            cpu_dump_state(cpu, stderr, 0);
 590            ret = -1;
 591            break;
 592        case HAX_EXIT_HLT:
 593            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 594                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 595                /* hlt instruction with interrupt disabled is shutdown */
 596                env->eflags |= IF_MASK;
 597                cpu->halted = 1;
 598                cpu->exception_index = EXCP_HLT;
 599                ret = 1;
 600            }
 601            break;
 602        /* these situations will continue to hax module */
 603        case HAX_EXIT_INTERRUPT:
 604        case HAX_EXIT_PAUSED:
 605            break;
 606        case HAX_EXIT_MMIO:
 607            /* Should not happen on UG system */
 608            fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 609            ret = -1;
 610            break;
 611        case HAX_EXIT_REAL:
 612            /* Should not happen on UG system */
 613            fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 614            ret = -1;
 615            break;
 616        default:
 617            fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 618            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 619            hax_vcpu_sync_state(env, 0);
 620            cpu_dump_state(cpu, stderr, 0);
 621            ret = 1;
 622            break;
 623        }
 624    } while (!ret);
 625
 626    if (cpu->exit_request) {
 627        cpu->exit_request = 0;
 628        cpu->exception_index = EXCP_INTERRUPT;
 629    }
 630    return ret < 0;
 631}
 632
 633static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 634{
 635    CPUArchState *env = cpu->env_ptr;
 636
 637    hax_arch_get_registers(env);
 638    cpu->vcpu_dirty = true;
 639}
 640
 641void hax_cpu_synchronize_state(CPUState *cpu)
 642{
 643    if (!cpu->vcpu_dirty) {
 644        run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 645    }
 646}
 647
 648static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 649                                              run_on_cpu_data arg)
 650{
 651    CPUArchState *env = cpu->env_ptr;
 652
 653    hax_vcpu_sync_state(env, 1);
 654    cpu->vcpu_dirty = false;
 655}
 656
 657void hax_cpu_synchronize_post_reset(CPUState *cpu)
 658{
 659    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 660}
 661
 662static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 663{
 664    CPUArchState *env = cpu->env_ptr;
 665
 666    hax_vcpu_sync_state(env, 1);
 667    cpu->vcpu_dirty = false;
 668}
 669
 670void hax_cpu_synchronize_post_init(CPUState *cpu)
 671{
 672    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 673}
 674
 675static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 676{
 677    cpu->vcpu_dirty = true;
 678}
 679
 680void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 681{
 682    run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 683}
 684
 685int hax_smp_cpu_exec(CPUState *cpu)
 686{
 687    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 688    int fatal;
 689    int ret;
 690
 691    while (1) {
 692        if (cpu->exception_index >= EXCP_INTERRUPT) {
 693            ret = cpu->exception_index;
 694            cpu->exception_index = -1;
 695            break;
 696        }
 697
 698        fatal = hax_vcpu_hax_exec(env);
 699
 700        if (fatal) {
 701            fprintf(stderr, "Unsupported HAX vcpu return\n");
 702            abort();
 703        }
 704    }
 705
 706    return ret;
 707}
 708
 709static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 710{
 711    memset(lhs, 0, sizeof(struct segment_desc_t));
 712    lhs->selector = rhs->selector;
 713    lhs->base = rhs->base;
 714    lhs->limit = rhs->limit;
 715    lhs->type = 3;
 716    lhs->present = 1;
 717    lhs->dpl = 3;
 718    lhs->operand_size = 0;
 719    lhs->desc = 1;
 720    lhs->long_mode = 0;
 721    lhs->granularity = 0;
 722    lhs->available = 0;
 723}
 724
 725static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 726{
 727    lhs->selector = rhs->selector;
 728    lhs->base = rhs->base;
 729    lhs->limit = rhs->limit;
 730    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 731        | (rhs->present * DESC_P_MASK)
 732        | (rhs->dpl << DESC_DPL_SHIFT)
 733        | (rhs->operand_size << DESC_B_SHIFT)
 734        | (rhs->desc * DESC_S_MASK)
 735        | (rhs->long_mode << DESC_L_SHIFT)
 736        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 737}
 738
 739static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 740{
 741    unsigned flags = rhs->flags;
 742
 743    memset(lhs, 0, sizeof(struct segment_desc_t));
 744    lhs->selector = rhs->selector;
 745    lhs->base = rhs->base;
 746    lhs->limit = rhs->limit;
 747    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 748    lhs->present = (flags & DESC_P_MASK) != 0;
 749    lhs->dpl = rhs->selector & 3;
 750    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 751    lhs->desc = (flags & DESC_S_MASK) != 0;
 752    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 753    lhs->granularity = (flags & DESC_G_MASK) != 0;
 754    lhs->available = (flags & DESC_AVL_MASK) != 0;
 755}
 756
 757static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 758{
 759    target_ulong reg = *hax_reg;
 760
 761    if (set) {
 762        *hax_reg = *qemu_reg;
 763    } else {
 764        *qemu_reg = reg;
 765    }
 766}
 767
 768/* The sregs has been synced with HAX kernel already before this call */
 769static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 770{
 771    get_seg(&env->segs[R_CS], &sregs->_cs);
 772    get_seg(&env->segs[R_DS], &sregs->_ds);
 773    get_seg(&env->segs[R_ES], &sregs->_es);
 774    get_seg(&env->segs[R_FS], &sregs->_fs);
 775    get_seg(&env->segs[R_GS], &sregs->_gs);
 776    get_seg(&env->segs[R_SS], &sregs->_ss);
 777
 778    get_seg(&env->tr, &sregs->_tr);
 779    get_seg(&env->ldt, &sregs->_ldt);
 780    env->idt.limit = sregs->_idt.limit;
 781    env->idt.base = sregs->_idt.base;
 782    env->gdt.limit = sregs->_gdt.limit;
 783    env->gdt.base = sregs->_gdt.base;
 784    return 0;
 785}
 786
 787static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 788{
 789    if ((env->eflags & VM_MASK)) {
 790        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 791        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 792        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 793        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 794        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 795        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 796    } else {
 797        set_seg(&sregs->_cs, &env->segs[R_CS]);
 798        set_seg(&sregs->_ds, &env->segs[R_DS]);
 799        set_seg(&sregs->_es, &env->segs[R_ES]);
 800        set_seg(&sregs->_fs, &env->segs[R_FS]);
 801        set_seg(&sregs->_gs, &env->segs[R_GS]);
 802        set_seg(&sregs->_ss, &env->segs[R_SS]);
 803
 804        if (env->cr[0] & CR0_PE_MASK) {
 805            /* force ss cpl to cs cpl */
 806            sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 807                                  (sregs->_cs.selector & 3);
 808            sregs->_ss.dpl = sregs->_ss.selector & 3;
 809        }
 810    }
 811
 812    set_seg(&sregs->_tr, &env->tr);
 813    set_seg(&sregs->_ldt, &env->ldt);
 814    sregs->_idt.limit = env->idt.limit;
 815    sregs->_idt.base = env->idt.base;
 816    sregs->_gdt.limit = env->gdt.limit;
 817    sregs->_gdt.base = env->gdt.base;
 818    return 0;
 819}
 820
 821static int hax_sync_vcpu_register(CPUArchState *env, int set)
 822{
 823    struct vcpu_state_t regs;
 824    int ret;
 825    memset(&regs, 0, sizeof(struct vcpu_state_t));
 826
 827    if (!set) {
 828        ret = hax_sync_vcpu_state(env, &regs, 0);
 829        if (ret < 0) {
 830            return -1;
 831        }
 832    }
 833
 834    /* generic register */
 835    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 836    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 837    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 838    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 839    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 840    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 841    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 842    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 843#ifdef TARGET_X86_64
 844    hax_getput_reg(&regs._r8, &env->regs[8], set);
 845    hax_getput_reg(&regs._r9, &env->regs[9], set);
 846    hax_getput_reg(&regs._r10, &env->regs[10], set);
 847    hax_getput_reg(&regs._r11, &env->regs[11], set);
 848    hax_getput_reg(&regs._r12, &env->regs[12], set);
 849    hax_getput_reg(&regs._r13, &env->regs[13], set);
 850    hax_getput_reg(&regs._r14, &env->regs[14], set);
 851    hax_getput_reg(&regs._r15, &env->regs[15], set);
 852#endif
 853    hax_getput_reg(&regs._rflags, &env->eflags, set);
 854    hax_getput_reg(&regs._rip, &env->eip, set);
 855
 856    if (set) {
 857        regs._cr0 = env->cr[0];
 858        regs._cr2 = env->cr[2];
 859        regs._cr3 = env->cr[3];
 860        regs._cr4 = env->cr[4];
 861        hax_set_segments(env, &regs);
 862    } else {
 863        env->cr[0] = regs._cr0;
 864        env->cr[2] = regs._cr2;
 865        env->cr[3] = regs._cr3;
 866        env->cr[4] = regs._cr4;
 867        hax_get_segments(env, &regs);
 868    }
 869
 870    if (set) {
 871        ret = hax_sync_vcpu_state(env, &regs, 1);
 872        if (ret < 0) {
 873            return -1;
 874        }
 875    }
 876    return 0;
 877}
 878
 879static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 880                              uint64_t value)
 881{
 882    item->entry = index;
 883    item->value = value;
 884}
 885
 886static int hax_get_msrs(CPUArchState *env)
 887{
 888    struct hax_msr_data md;
 889    struct vmx_msr *msrs = md.entries;
 890    int ret, i, n;
 891
 892    n = 0;
 893    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 894    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 895    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 896    msrs[n++].entry = MSR_IA32_TSC;
 897#ifdef TARGET_X86_64
 898    msrs[n++].entry = MSR_EFER;
 899    msrs[n++].entry = MSR_STAR;
 900    msrs[n++].entry = MSR_LSTAR;
 901    msrs[n++].entry = MSR_CSTAR;
 902    msrs[n++].entry = MSR_FMASK;
 903    msrs[n++].entry = MSR_KERNELGSBASE;
 904#endif
 905    md.nr_msr = n;
 906    ret = hax_sync_msr(env, &md, 0);
 907    if (ret < 0) {
 908        return ret;
 909    }
 910
 911    for (i = 0; i < md.done; i++) {
 912        switch (msrs[i].entry) {
 913        case MSR_IA32_SYSENTER_CS:
 914            env->sysenter_cs = msrs[i].value;
 915            break;
 916        case MSR_IA32_SYSENTER_ESP:
 917            env->sysenter_esp = msrs[i].value;
 918            break;
 919        case MSR_IA32_SYSENTER_EIP:
 920            env->sysenter_eip = msrs[i].value;
 921            break;
 922        case MSR_IA32_TSC:
 923            env->tsc = msrs[i].value;
 924            break;
 925#ifdef TARGET_X86_64
 926        case MSR_EFER:
 927            env->efer = msrs[i].value;
 928            break;
 929        case MSR_STAR:
 930            env->star = msrs[i].value;
 931            break;
 932        case MSR_LSTAR:
 933            env->lstar = msrs[i].value;
 934            break;
 935        case MSR_CSTAR:
 936            env->cstar = msrs[i].value;
 937            break;
 938        case MSR_FMASK:
 939            env->fmask = msrs[i].value;
 940            break;
 941        case MSR_KERNELGSBASE:
 942            env->kernelgsbase = msrs[i].value;
 943            break;
 944#endif
 945        }
 946    }
 947
 948    return 0;
 949}
 950
 951static int hax_set_msrs(CPUArchState *env)
 952{
 953    struct hax_msr_data md;
 954    struct vmx_msr *msrs;
 955    msrs = md.entries;
 956    int n = 0;
 957
 958    memset(&md, 0, sizeof(struct hax_msr_data));
 959    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 960    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 961    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 962    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 963#ifdef TARGET_X86_64
 964    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 965    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 966    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 967    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 968    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 969    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 970#endif
 971    md.nr_msr = n;
 972    md.done = 0;
 973
 974    return hax_sync_msr(env, &md, 1);
 975}
 976
 977static int hax_get_fpu(CPUArchState *env)
 978{
 979    struct fx_layout fpu;
 980    int i, ret;
 981
 982    ret = hax_sync_fpu(env, &fpu, 0);
 983    if (ret < 0) {
 984        return ret;
 985    }
 986
 987    env->fpstt = (fpu.fsw >> 11) & 7;
 988    env->fpus = fpu.fsw;
 989    env->fpuc = fpu.fcw;
 990    for (i = 0; i < 8; ++i) {
 991        env->fptags[i] = !((fpu.ftw >> i) & 1);
 992    }
 993    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
 994
 995    for (i = 0; i < 8; i++) {
 996        env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
 997        env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
 998        if (CPU_NB_REGS > 8) {
 999            env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
1000            env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
1001        }
1002    }
1003    env->mxcsr = fpu.mxcsr;
1004
1005    return 0;
1006}
1007
1008static int hax_set_fpu(CPUArchState *env)
1009{
1010    struct fx_layout fpu;
1011    int i;
1012
1013    memset(&fpu, 0, sizeof(fpu));
1014    fpu.fsw = env->fpus & ~(7 << 11);
1015    fpu.fsw |= (env->fpstt & 7) << 11;
1016    fpu.fcw = env->fpuc;
1017
1018    for (i = 0; i < 8; ++i) {
1019        fpu.ftw |= (!env->fptags[i]) << i;
1020    }
1021
1022    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1023    for (i = 0; i < 8; i++) {
1024        stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1025        stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1026        if (CPU_NB_REGS > 8) {
1027            stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1028            stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1029        }
1030    }
1031
1032    fpu.mxcsr = env->mxcsr;
1033
1034    return hax_sync_fpu(env, &fpu, 1);
1035}
1036
1037static int hax_arch_get_registers(CPUArchState *env)
1038{
1039    int ret;
1040
1041    ret = hax_sync_vcpu_register(env, 0);
1042    if (ret < 0) {
1043        return ret;
1044    }
1045
1046    ret = hax_get_fpu(env);
1047    if (ret < 0) {
1048        return ret;
1049    }
1050
1051    ret = hax_get_msrs(env);
1052    if (ret < 0) {
1053        return ret;
1054    }
1055
1056    x86_update_hflags(env);
1057    return 0;
1058}
1059
1060static int hax_arch_set_registers(CPUArchState *env)
1061{
1062    int ret;
1063    ret = hax_sync_vcpu_register(env, 1);
1064
1065    if (ret < 0) {
1066        fprintf(stderr, "Failed to sync vcpu reg\n");
1067        return ret;
1068    }
1069    ret = hax_set_fpu(env);
1070    if (ret < 0) {
1071        fprintf(stderr, "FPU failed\n");
1072        return ret;
1073    }
1074    ret = hax_set_msrs(env);
1075    if (ret < 0) {
1076        fprintf(stderr, "MSR failed\n");
1077        return ret;
1078    }
1079
1080    return 0;
1081}
1082
1083static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1084{
1085    if (hax_enabled()) {
1086        if (modified) {
1087            hax_arch_set_registers(env);
1088        } else {
1089            hax_arch_get_registers(env);
1090        }
1091    }
1092}
1093
1094/*
1095 * much simpler than kvm, at least in first stage because:
1096 * We don't need consider the device pass-through, we don't need
1097 * consider the framebuffer, and we may even remove the bios at all
1098 */
1099int hax_sync_vcpus(void)
1100{
1101    if (hax_enabled()) {
1102        CPUState *cpu;
1103
1104        cpu = first_cpu;
1105        if (!cpu) {
1106            return 0;
1107        }
1108
1109        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1110            int ret;
1111
1112            ret = hax_arch_set_registers(cpu->env_ptr);
1113            if (ret < 0) {
1114                return ret;
1115            }
1116        }
1117    }
1118
1119    return 0;
1120}
1121
1122void hax_reset_vcpu_state(void *opaque)
1123{
1124    CPUState *cpu;
1125    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1126        cpu->hax_vcpu->tunnel->user_event_pending = 0;
1127        cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1128    }
1129}
1130
1131static void hax_accel_class_init(ObjectClass *oc, void *data)
1132{
1133    AccelClass *ac = ACCEL_CLASS(oc);
1134    ac->name = "HAX";
1135    ac->init_machine = hax_accel_init;
1136    ac->allowed = &hax_allowed;
1137}
1138
1139static const TypeInfo hax_accel_type = {
1140    .name = ACCEL_CLASS_NAME("hax"),
1141    .parent = TYPE_ACCEL,
1142    .class_init = hax_accel_class_init,
1143};
1144
1145static void hax_type_init(void)
1146{
1147    type_register_static(&hax_accel_type);
1148}
1149
1150type_init(hax_type_init);
1151