LXR qemu/target/i386/hax/hax-all.c

   1/*
   2 * QEMU HAX support
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *           Red Hat, Inc. 2008
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Glauber Costa     <gcosta@redhat.com>
  10 *
  11 * Copyright (c) 2011 Intel Corporation
  12 *  Written by:
  13 *  Jiang Yunhong<yunhong.jiang@intel.com>
  14 *  Xin Xiaohui<xiaohui.xin@intel.com>
  15 *  Zhang Xiantao<xiantao.zhang@intel.com>
  16 *
  17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18 * See the COPYING file in the top-level directory.
  19 *
  20 */
  21
  22/*
  23 * HAX common code for both windows and darwin
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "cpu.h"
  28#include "exec/address-spaces.h"
  29
  30#include "qemu/accel.h"
  31#include "sysemu/reset.h"
  32#include "sysemu/runstate.h"
  33#include "hw/boards.h"
  34
  35#include "hax-accel-ops.h"
  36
  37#define DEBUG_HAX 0
  38
  39#define DPRINTF(fmt, ...) \
  40    do { \
  41        if (DEBUG_HAX) { \
  42            fprintf(stdout, fmt, ## __VA_ARGS__); \
  43        } \
  44    } while (0)
  45
  46/* Current version */
  47const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  48/* Minimum HAX kernel version */
  49const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  50
  51bool hax_allowed;
  52
  53struct hax_state hax_global;
  54
  55static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  56static int hax_arch_get_registers(CPUArchState *env);
  57
  58int valid_hax_tunnel_size(uint16_t size)
  59{
  60    return size >= sizeof(struct hax_tunnel);
  61}
  62
  63hax_fd hax_vcpu_get_fd(CPUArchState *env)
  64{
  65    struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
  66    if (!vcpu) {
  67        return HAX_INVALID_FD;
  68    }
  69    return vcpu->fd;
  70}
  71
  72static int hax_get_capability(struct hax_state *hax)
  73{
  74    int ret;
  75    struct hax_capabilityinfo capinfo, *cap = &capinfo;
  76
  77    ret = hax_capability(hax, cap);
  78    if (ret) {
  79        return ret;
  80    }
  81
  82    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  83        if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  84            DPRINTF
  85                ("VTX feature is not enabled, HAX driver will not work.\n");
  86        } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  87            DPRINTF
  88                ("NX feature is not enabled, HAX driver will not work.\n");
  89        }
  90        return -ENXIO;
  91
  92    }
  93
  94    if (!(cap->winfo & HAX_CAP_UG)) {
  95        fprintf(stderr, "UG mode is not supported by the hardware.\n");
  96        return -ENOTSUP;
  97    }
  98
  99    hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
 100
 101    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 102        if (cap->mem_quota < hax->mem_quota) {
 103            fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 104            return -ENOSPC;
 105        }
 106    }
 107    return 0;
 108}
 109
 110static int hax_version_support(struct hax_state *hax)
 111{
 112    int ret;
 113    struct hax_module_version version;
 114
 115    ret = hax_mod_version(hax, &version);
 116    if (ret < 0) {
 117        return 0;
 118    }
 119
 120    if (hax_min_version > version.cur_version) {
 121        fprintf(stderr, "Incompatible HAX module version %d,",
 122                version.cur_version);
 123        fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 124        return 0;
 125    }
 126    if (hax_cur_version < version.compat_version) {
 127        fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 128                hax_cur_version);
 129        fprintf(stderr, "requires minimum HAX API version %x\n",
 130                version.compat_version);
 131        return 0;
 132    }
 133
 134    return 1;
 135}
 136
 137int hax_vcpu_create(int id)
 138{
 139    struct hax_vcpu_state *vcpu = NULL;
 140    int ret;
 141
 142    if (!hax_global.vm) {
 143        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 144        return -1;
 145    }
 146
 147    if (hax_global.vm->vcpus[id]) {
 148        fprintf(stderr, "vcpu %x allocated already\n", id);
 149        return 0;
 150    }
 151
 152    vcpu = g_new0(struct hax_vcpu_state, 1);
 153
 154    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 155    if (ret) {
 156        fprintf(stderr, "Failed to create vcpu %x\n", id);
 157        goto error;
 158    }
 159
 160    vcpu->vcpu_id = id;
 161    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 162    if (hax_invalid_fd(vcpu->fd)) {
 163        fprintf(stderr, "Failed to open the vcpu\n");
 164        ret = -ENODEV;
 165        goto error;
 166    }
 167
 168    hax_global.vm->vcpus[id] = vcpu;
 169
 170    ret = hax_host_setup_vcpu_channel(vcpu);
 171    if (ret) {
 172        fprintf(stderr, "Invalid hax tunnel size\n");
 173        ret = -EINVAL;
 174        goto error;
 175    }
 176    return 0;
 177
 178  error:
 179    /* vcpu and tunnel will be closed automatically */
 180    if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 181        hax_close_fd(vcpu->fd);
 182    }
 183
 184    hax_global.vm->vcpus[id] = NULL;
 185    g_free(vcpu);
 186    return -1;
 187}
 188
 189int hax_vcpu_destroy(CPUState *cpu)
 190{
 191    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 192
 193    if (!hax_global.vm) {
 194        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 195        return -1;
 196    }
 197
 198    if (!vcpu) {
 199        return 0;
 200    }
 201
 202    /*
 203     * 1. The hax_tunnel is also destroyed when vcpu is destroyed
 204     * 2. close fd will cause hax module vcpu be cleaned
 205     */
 206    hax_close_fd(vcpu->fd);
 207    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 208    g_free(vcpu);
 209    return 0;
 210}
 211
 212int hax_init_vcpu(CPUState *cpu)
 213{
 214    int ret;
 215
 216    ret = hax_vcpu_create(cpu->cpu_index);
 217    if (ret < 0) {
 218        fprintf(stderr, "Failed to create HAX vcpu\n");
 219        exit(-1);
 220    }
 221
 222    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 223    cpu->vcpu_dirty = true;
 224    qemu_register_reset(hax_reset_vcpu_state, cpu->env_ptr);
 225
 226    return ret;
 227}
 228
 229struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus)
 230{
 231    struct hax_vm *vm;
 232    int vm_id = 0, ret, i;
 233
 234    if (hax_invalid_fd(hax->fd)) {
 235        return NULL;
 236    }
 237
 238    if (hax->vm) {
 239        return hax->vm;
 240    }
 241
 242    if (max_cpus > HAX_MAX_VCPU) {
 243        fprintf(stderr, "Maximum VCPU number QEMU supported is %d\n", HAX_MAX_VCPU);
 244        return NULL;
 245    }
 246
 247    vm = g_new0(struct hax_vm, 1);
 248
 249    ret = hax_host_create_vm(hax, &vm_id);
 250    if (ret) {
 251        fprintf(stderr, "Failed to create vm %x\n", ret);
 252        goto error;
 253    }
 254    vm->id = vm_id;
 255    vm->fd = hax_host_open_vm(hax, vm_id);
 256    if (hax_invalid_fd(vm->fd)) {
 257        fprintf(stderr, "Failed to open vm %d\n", vm_id);
 258        goto error;
 259    }
 260
 261    vm->numvcpus = max_cpus;
 262    vm->vcpus = g_new0(struct hax_vcpu_state *, vm->numvcpus);
 263    for (i = 0; i < vm->numvcpus; i++) {
 264        vm->vcpus[i] = NULL;
 265    }
 266
 267    hax->vm = vm;
 268    return vm;
 269
 270  error:
 271    g_free(vm);
 272    hax->vm = NULL;
 273    return NULL;
 274}
 275
 276int hax_vm_destroy(struct hax_vm *vm)
 277{
 278    int i;
 279
 280    for (i = 0; i < vm->numvcpus; i++)
 281        if (vm->vcpus[i]) {
 282            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 283            return -1;
 284        }
 285    hax_close_fd(vm->fd);
 286    vm->numvcpus = 0;
 287    g_free(vm->vcpus);
 288    g_free(vm);
 289    hax_global.vm = NULL;
 290    return 0;
 291}
 292
 293static int hax_init(ram_addr_t ram_size, int max_cpus)
 294{
 295    struct hax_state *hax = NULL;
 296    struct hax_qemu_version qversion;
 297    int ret;
 298
 299    hax = &hax_global;
 300
 301    memset(hax, 0, sizeof(struct hax_state));
 302    hax->mem_quota = ram_size;
 303
 304    hax->fd = hax_mod_open();
 305    if (hax_invalid_fd(hax->fd)) {
 306        hax->fd = 0;
 307        ret = -ENODEV;
 308        goto error;
 309    }
 310
 311    ret = hax_get_capability(hax);
 312
 313    if (ret) {
 314        if (ret != -ENOSPC) {
 315            ret = -EINVAL;
 316        }
 317        goto error;
 318    }
 319
 320    if (!hax_version_support(hax)) {
 321        ret = -EINVAL;
 322        goto error;
 323    }
 324
 325    hax->vm = hax_vm_create(hax, max_cpus);
 326    if (!hax->vm) {
 327        fprintf(stderr, "Failed to create HAX VM\n");
 328        ret = -EINVAL;
 329        goto error;
 330    }
 331
 332    hax_memory_init();
 333
 334    qversion.cur_version = hax_cur_version;
 335    qversion.min_version = hax_min_version;
 336    hax_notify_qemu_version(hax->vm->fd, &qversion);
 337
 338    return ret;
 339  error:
 340    if (hax->vm) {
 341        hax_vm_destroy(hax->vm);
 342    }
 343    if (hax->fd) {
 344        hax_mod_close(hax);
 345    }
 346
 347    return ret;
 348}
 349
 350static int hax_accel_init(MachineState *ms)
 351{
 352    int ret = hax_init(ms->ram_size, (int)ms->smp.max_cpus);
 353
 354    if (ret && (ret != -ENOSPC)) {
 355        fprintf(stderr, "No accelerator found.\n");
 356    } else {
 357        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 358                !ret ? "working" : "not working",
 359                !ret ? "fast virt" : "emulation");
 360    }
 361    return ret;
 362}
 363
 364static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 365{
 366    if (hft->direction < 2) {
 367        cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
 368                               hft->direction);
 369    } else {
 370        /*
 371         * HAX API v4 supports transferring data between two MMIO addresses,
 372         * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 373         *  hft->direction == 2: gpa ==> gpa2
 374         */
 375        uint64_t value;
 376        cpu_physical_memory_read(hft->gpa, &value, hft->size);
 377        cpu_physical_memory_write(hft->gpa2, &value, hft->size);
 378    }
 379
 380    return 0;
 381}
 382
 383static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 384                         int direction, int size, int count, void *buffer)
 385{
 386    uint8_t *ptr;
 387    int i;
 388    MemTxAttrs attrs = { 0 };
 389
 390    if (!df) {
 391        ptr = (uint8_t *) buffer;
 392    } else {
 393        ptr = buffer + size * count - size;
 394    }
 395    for (i = 0; i < count; i++) {
 396        address_space_rw(&address_space_io, port, attrs,
 397                         ptr, size, direction == HAX_EXIT_IO_OUT);
 398        if (!df) {
 399            ptr += size;
 400        } else {
 401            ptr -= size;
 402        }
 403    }
 404
 405    return 0;
 406}
 407
 408static int hax_vcpu_interrupt(CPUArchState *env)
 409{
 410    CPUState *cpu = env_cpu(env);
 411    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 412    struct hax_tunnel *ht = vcpu->tunnel;
 413
 414    /*
 415     * Try to inject an interrupt if the guest can accept it
 416     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 417     */
 418    if (ht->ready_for_interrupt_injection &&
 419        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 420        int irq;
 421
 422        irq = cpu_get_pic_interrupt(env);
 423        if (irq >= 0) {
 424            hax_inject_interrupt(env, irq);
 425            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 426        }
 427    }
 428
 429    /* If we have an interrupt but the guest is not ready to receive an
 430     * interrupt, request an interrupt window exit.  This will
 431     * cause a return to userspace as soon as the guest is ready to
 432     * receive interrupts. */
 433    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 434        ht->request_interrupt_window = 1;
 435    } else {
 436        ht->request_interrupt_window = 0;
 437    }
 438    return 0;
 439}
 440
 441void hax_raise_event(CPUState *cpu)
 442{
 443    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 444
 445    if (!vcpu) {
 446        return;
 447    }
 448    vcpu->tunnel->user_event_pending = 1;
 449}
 450
 451/*
 452 * Ask hax kernel module to run the CPU for us till:
 453 * 1. Guest crash or shutdown
 454 * 2. Need QEMU's emulation like guest execute MMIO instruction
 455 * 3. Guest execute HLT
 456 * 4. QEMU have Signal/event pending
 457 * 5. An unknown VMX exit happens
 458 */
 459static int hax_vcpu_hax_exec(CPUArchState *env)
 460{
 461    int ret = 0;
 462    CPUState *cpu = env_cpu(env);
 463    X86CPU *x86_cpu = X86_CPU(cpu);
 464    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 465    struct hax_tunnel *ht = vcpu->tunnel;
 466
 467    if (!hax_enabled()) {
 468        DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 469        return 0;
 470    }
 471
 472    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 473        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 474        apic_poll_irq(x86_cpu->apic_state);
 475    }
 476
 477    /* After a vcpu is halted (either because it is an AP and has just been
 478     * reset, or because it has executed the HLT instruction), it will not be
 479     * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
 480     * for events that may change the halted state of this vcpu:
 481     *  a) Maskable interrupt, when RFLAGS.IF is 1;
 482     *     Note: env->eflags may not reflect the current RFLAGS state, because
 483     *           it is not updated after each hax_vcpu_run(). We cannot afford
 484     *           to fail to recognize any unhalt-by-maskable-interrupt event
 485     *           (in which case the vcpu will halt forever), and yet we cannot
 486     *           afford the overhead of hax_vcpu_sync_state(). The current
 487     *           solution is to err on the side of caution and have the HLT
 488     *           handler (see case HAX_EXIT_HLT below) unconditionally set the
 489     *           IF_MASK bit in env->eflags, which, in effect, disables the
 490     *           RFLAGS.IF check.
 491     *  b) NMI;
 492     *  c) INIT signal;
 493     *  d) SIPI signal.
 494     */
 495    if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 496         (env->eflags & IF_MASK)) ||
 497        (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 498        cpu->halted = 0;
 499    }
 500
 501    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 502        DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 503                cpu->cpu_index);
 504        do_cpu_init(x86_cpu);
 505        hax_vcpu_sync_state(env, 1);
 506    }
 507
 508    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 509        DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 510                cpu->cpu_index);
 511        hax_vcpu_sync_state(env, 0);
 512        do_cpu_sipi(x86_cpu);
 513        hax_vcpu_sync_state(env, 1);
 514    }
 515
 516    if (cpu->halted) {
 517        /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
 518         * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
 519         * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
 520         * until the vcpu is unhalted.
 521         */
 522        cpu->exception_index = EXCP_HLT;
 523        return 0;
 524    }
 525
 526    do {
 527        int hax_ret;
 528
 529        if (cpu->exit_request) {
 530            ret = 1;
 531            break;
 532        }
 533
 534        hax_vcpu_interrupt(env);
 535
 536        qemu_mutex_unlock_iothread();
 537        cpu_exec_start(cpu);
 538        hax_ret = hax_vcpu_run(vcpu);
 539        cpu_exec_end(cpu);
 540        qemu_mutex_lock_iothread();
 541
 542        /* Simply continue the vcpu_run if system call interrupted */
 543        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 544            DPRINTF("io window interrupted\n");
 545            continue;
 546        }
 547
 548        if (hax_ret < 0) {
 549            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 550            abort();
 551        }
 552        switch (ht->_exit_status) {
 553        case HAX_EXIT_IO:
 554            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 555                            ht->pio._direction,
 556                            ht->pio._size, ht->pio._count, vcpu->iobuf);
 557            break;
 558        case HAX_EXIT_FAST_MMIO:
 559            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 560            break;
 561        /* Guest state changed, currently only for shutdown */
 562        case HAX_EXIT_STATECHANGE:
 563            fprintf(stdout, "VCPU shutdown request\n");
 564            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 565            hax_vcpu_sync_state(env, 0);
 566            ret = 1;
 567            break;
 568        case HAX_EXIT_UNKNOWN_VMEXIT:
 569            fprintf(stderr, "Unknown VMX exit %x from guest\n",
 570                    ht->_exit_reason);
 571            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 572            hax_vcpu_sync_state(env, 0);
 573            cpu_dump_state(cpu, stderr, 0);
 574            ret = -1;
 575            break;
 576        case HAX_EXIT_HLT:
 577            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 578                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 579                /* hlt instruction with interrupt disabled is shutdown */
 580                env->eflags |= IF_MASK;
 581                cpu->halted = 1;
 582                cpu->exception_index = EXCP_HLT;
 583                ret = 1;
 584            }
 585            break;
 586        /* these situations will continue to hax module */
 587        case HAX_EXIT_INTERRUPT:
 588        case HAX_EXIT_PAUSED:
 589            break;
 590        case HAX_EXIT_MMIO:
 591            /* Should not happen on UG system */
 592            fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 593            ret = -1;
 594            break;
 595        case HAX_EXIT_REAL:
 596            /* Should not happen on UG system */
 597            fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 598            ret = -1;
 599            break;
 600        default:
 601            fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 602            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 603            hax_vcpu_sync_state(env, 0);
 604            cpu_dump_state(cpu, stderr, 0);
 605            ret = 1;
 606            break;
 607        }
 608    } while (!ret);
 609
 610    if (cpu->exit_request) {
 611        cpu->exit_request = 0;
 612        cpu->exception_index = EXCP_INTERRUPT;
 613    }
 614    return ret < 0;
 615}
 616
 617static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 618{
 619    CPUArchState *env = cpu->env_ptr;
 620
 621    hax_arch_get_registers(env);
 622    cpu->vcpu_dirty = true;
 623}
 624
 625void hax_cpu_synchronize_state(CPUState *cpu)
 626{
 627    if (!cpu->vcpu_dirty) {
 628        run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 629    }
 630}
 631
 632static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 633                                              run_on_cpu_data arg)
 634{
 635    CPUArchState *env = cpu->env_ptr;
 636
 637    hax_vcpu_sync_state(env, 1);
 638    cpu->vcpu_dirty = false;
 639}
 640
 641void hax_cpu_synchronize_post_reset(CPUState *cpu)
 642{
 643    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 644}
 645
 646static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 647{
 648    CPUArchState *env = cpu->env_ptr;
 649
 650    hax_vcpu_sync_state(env, 1);
 651    cpu->vcpu_dirty = false;
 652}
 653
 654void hax_cpu_synchronize_post_init(CPUState *cpu)
 655{
 656    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 657}
 658
 659static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 660{
 661    cpu->vcpu_dirty = true;
 662}
 663
 664void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 665{
 666    run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 667}
 668
 669int hax_smp_cpu_exec(CPUState *cpu)
 670{
 671    CPUArchState *env = cpu->env_ptr;
 672    int fatal;
 673    int ret;
 674
 675    while (1) {
 676        if (cpu->exception_index >= EXCP_INTERRUPT) {
 677            ret = cpu->exception_index;
 678            cpu->exception_index = -1;
 679            break;
 680        }
 681
 682        fatal = hax_vcpu_hax_exec(env);
 683
 684        if (fatal) {
 685            fprintf(stderr, "Unsupported HAX vcpu return\n");
 686            abort();
 687        }
 688    }
 689
 690    return ret;
 691}
 692
 693static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 694{
 695    memset(lhs, 0, sizeof(struct segment_desc_t));
 696    lhs->selector = rhs->selector;
 697    lhs->base = rhs->base;
 698    lhs->limit = rhs->limit;
 699    lhs->type = 3;
 700    lhs->present = 1;
 701    lhs->dpl = 3;
 702    lhs->operand_size = 0;
 703    lhs->desc = 1;
 704    lhs->long_mode = 0;
 705    lhs->granularity = 0;
 706    lhs->available = 0;
 707}
 708
 709static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 710{
 711    lhs->selector = rhs->selector;
 712    lhs->base = rhs->base;
 713    lhs->limit = rhs->limit;
 714    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 715        | (rhs->present * DESC_P_MASK)
 716        | (rhs->dpl << DESC_DPL_SHIFT)
 717        | (rhs->operand_size << DESC_B_SHIFT)
 718        | (rhs->desc * DESC_S_MASK)
 719        | (rhs->long_mode << DESC_L_SHIFT)
 720        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 721}
 722
 723static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 724{
 725    unsigned flags = rhs->flags;
 726
 727    memset(lhs, 0, sizeof(struct segment_desc_t));
 728    lhs->selector = rhs->selector;
 729    lhs->base = rhs->base;
 730    lhs->limit = rhs->limit;
 731    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 732    lhs->present = (flags & DESC_P_MASK) != 0;
 733    lhs->dpl = rhs->selector & 3;
 734    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 735    lhs->desc = (flags & DESC_S_MASK) != 0;
 736    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 737    lhs->granularity = (flags & DESC_G_MASK) != 0;
 738    lhs->available = (flags & DESC_AVL_MASK) != 0;
 739}
 740
 741static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 742{
 743    target_ulong reg = *hax_reg;
 744
 745    if (set) {
 746        *hax_reg = *qemu_reg;
 747    } else {
 748        *qemu_reg = reg;
 749    }
 750}
 751
 752/* The sregs has been synced with HAX kernel already before this call */
 753static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 754{
 755    get_seg(&env->segs[R_CS], &sregs->_cs);
 756    get_seg(&env->segs[R_DS], &sregs->_ds);
 757    get_seg(&env->segs[R_ES], &sregs->_es);
 758    get_seg(&env->segs[R_FS], &sregs->_fs);
 759    get_seg(&env->segs[R_GS], &sregs->_gs);
 760    get_seg(&env->segs[R_SS], &sregs->_ss);
 761
 762    get_seg(&env->tr, &sregs->_tr);
 763    get_seg(&env->ldt, &sregs->_ldt);
 764    env->idt.limit = sregs->_idt.limit;
 765    env->idt.base = sregs->_idt.base;
 766    env->gdt.limit = sregs->_gdt.limit;
 767    env->gdt.base = sregs->_gdt.base;
 768    return 0;
 769}
 770
 771static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 772{
 773    if ((env->eflags & VM_MASK)) {
 774        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 775        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 776        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 777        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 778        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 779        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 780    } else {
 781        set_seg(&sregs->_cs, &env->segs[R_CS]);
 782        set_seg(&sregs->_ds, &env->segs[R_DS]);
 783        set_seg(&sregs->_es, &env->segs[R_ES]);
 784        set_seg(&sregs->_fs, &env->segs[R_FS]);
 785        set_seg(&sregs->_gs, &env->segs[R_GS]);
 786        set_seg(&sregs->_ss, &env->segs[R_SS]);
 787
 788        if (env->cr[0] & CR0_PE_MASK) {
 789            /* force ss cpl to cs cpl */
 790            sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 791                                  (sregs->_cs.selector & 3);
 792            sregs->_ss.dpl = sregs->_ss.selector & 3;
 793        }
 794    }
 795
 796    set_seg(&sregs->_tr, &env->tr);
 797    set_seg(&sregs->_ldt, &env->ldt);
 798    sregs->_idt.limit = env->idt.limit;
 799    sregs->_idt.base = env->idt.base;
 800    sregs->_gdt.limit = env->gdt.limit;
 801    sregs->_gdt.base = env->gdt.base;
 802    return 0;
 803}
 804
 805static int hax_sync_vcpu_register(CPUArchState *env, int set)
 806{
 807    struct vcpu_state_t regs;
 808    int ret;
 809    memset(&regs, 0, sizeof(struct vcpu_state_t));
 810
 811    if (!set) {
 812        ret = hax_sync_vcpu_state(env, &regs, 0);
 813        if (ret < 0) {
 814            return -1;
 815        }
 816    }
 817
 818    /* generic register */
 819    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 820    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 821    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 822    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 823    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 824    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 825    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 826    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 827#ifdef TARGET_X86_64
 828    hax_getput_reg(&regs._r8, &env->regs[8], set);
 829    hax_getput_reg(&regs._r9, &env->regs[9], set);
 830    hax_getput_reg(&regs._r10, &env->regs[10], set);
 831    hax_getput_reg(&regs._r11, &env->regs[11], set);
 832    hax_getput_reg(&regs._r12, &env->regs[12], set);
 833    hax_getput_reg(&regs._r13, &env->regs[13], set);
 834    hax_getput_reg(&regs._r14, &env->regs[14], set);
 835    hax_getput_reg(&regs._r15, &env->regs[15], set);
 836#endif
 837    hax_getput_reg(&regs._rflags, &env->eflags, set);
 838    hax_getput_reg(&regs._rip, &env->eip, set);
 839
 840    if (set) {
 841        regs._cr0 = env->cr[0];
 842        regs._cr2 = env->cr[2];
 843        regs._cr3 = env->cr[3];
 844        regs._cr4 = env->cr[4];
 845        hax_set_segments(env, &regs);
 846    } else {
 847        env->cr[0] = regs._cr0;
 848        env->cr[2] = regs._cr2;
 849        env->cr[3] = regs._cr3;
 850        env->cr[4] = regs._cr4;
 851        hax_get_segments(env, &regs);
 852    }
 853
 854    if (set) {
 855        ret = hax_sync_vcpu_state(env, &regs, 1);
 856        if (ret < 0) {
 857            return -1;
 858        }
 859    }
 860    return 0;
 861}
 862
 863static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 864                              uint64_t value)
 865{
 866    item->entry = index;
 867    item->value = value;
 868}
 869
 870static int hax_get_msrs(CPUArchState *env)
 871{
 872    struct hax_msr_data md;
 873    struct vmx_msr *msrs = md.entries;
 874    int ret, i, n;
 875
 876    n = 0;
 877    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 878    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 879    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 880    msrs[n++].entry = MSR_IA32_TSC;
 881#ifdef TARGET_X86_64
 882    msrs[n++].entry = MSR_EFER;
 883    msrs[n++].entry = MSR_STAR;
 884    msrs[n++].entry = MSR_LSTAR;
 885    msrs[n++].entry = MSR_CSTAR;
 886    msrs[n++].entry = MSR_FMASK;
 887    msrs[n++].entry = MSR_KERNELGSBASE;
 888#endif
 889    md.nr_msr = n;
 890    ret = hax_sync_msr(env, &md, 0);
 891    if (ret < 0) {
 892        return ret;
 893    }
 894
 895    for (i = 0; i < md.done; i++) {
 896        switch (msrs[i].entry) {
 897        case MSR_IA32_SYSENTER_CS:
 898            env->sysenter_cs = msrs[i].value;
 899            break;
 900        case MSR_IA32_SYSENTER_ESP:
 901            env->sysenter_esp = msrs[i].value;
 902            break;
 903        case MSR_IA32_SYSENTER_EIP:
 904            env->sysenter_eip = msrs[i].value;
 905            break;
 906        case MSR_IA32_TSC:
 907            env->tsc = msrs[i].value;
 908            break;
 909#ifdef TARGET_X86_64
 910        case MSR_EFER:
 911            env->efer = msrs[i].value;
 912            break;
 913        case MSR_STAR:
 914            env->star = msrs[i].value;
 915            break;
 916        case MSR_LSTAR:
 917            env->lstar = msrs[i].value;
 918            break;
 919        case MSR_CSTAR:
 920            env->cstar = msrs[i].value;
 921            break;
 922        case MSR_FMASK:
 923            env->fmask = msrs[i].value;
 924            break;
 925        case MSR_KERNELGSBASE:
 926            env->kernelgsbase = msrs[i].value;
 927            break;
 928#endif
 929        }
 930    }
 931
 932    return 0;
 933}
 934
 935static int hax_set_msrs(CPUArchState *env)
 936{
 937    struct hax_msr_data md;
 938    struct vmx_msr *msrs;
 939    msrs = md.entries;
 940    int n = 0;
 941
 942    memset(&md, 0, sizeof(struct hax_msr_data));
 943    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 944    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 945    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 946    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 947#ifdef TARGET_X86_64
 948    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 949    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 950    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 951    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 952    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 953    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 954#endif
 955    md.nr_msr = n;
 956    md.done = 0;
 957
 958    return hax_sync_msr(env, &md, 1);
 959}
 960
 961static int hax_get_fpu(CPUArchState *env)
 962{
 963    struct fx_layout fpu;
 964    int i, ret;
 965
 966    ret = hax_sync_fpu(env, &fpu, 0);
 967    if (ret < 0) {
 968        return ret;
 969    }
 970
 971    env->fpstt = (fpu.fsw >> 11) & 7;
 972    env->fpus = fpu.fsw;
 973    env->fpuc = fpu.fcw;
 974    for (i = 0; i < 8; ++i) {
 975        env->fptags[i] = !((fpu.ftw >> i) & 1);
 976    }
 977    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
 978
 979    for (i = 0; i < 8; i++) {
 980        env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
 981        env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
 982        if (CPU_NB_REGS > 8) {
 983            env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
 984            env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
 985        }
 986    }
 987    env->mxcsr = fpu.mxcsr;
 988
 989    return 0;
 990}
 991
 992static int hax_set_fpu(CPUArchState *env)
 993{
 994    struct fx_layout fpu;
 995    int i;
 996
 997    memset(&fpu, 0, sizeof(fpu));
 998    fpu.fsw = env->fpus & ~(7 << 11);
 999    fpu.fsw |= (env->fpstt & 7) << 11;
1000    fpu.fcw = env->fpuc;

1001
1002    for (i = 0; i < 8; ++i) {
1003        fpu.ftw |= (!env->fptags[i]) << i;
1004    }
1005
1006    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1007    for (i = 0; i < 8; i++) {
1008        stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1009        stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1010        if (CPU_NB_REGS > 8) {
1011            stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1012            stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1013        }
1014    }
1015
1016    fpu.mxcsr = env->mxcsr;
1017
1018    return hax_sync_fpu(env, &fpu, 1);
1019}
1020
1021static int hax_arch_get_registers(CPUArchState *env)
1022{
1023    int ret;
1024
1025    ret = hax_sync_vcpu_register(env, 0);
1026    if (ret < 0) {
1027        return ret;
1028    }
1029
1030    ret = hax_get_fpu(env);
1031    if (ret < 0) {
1032        return ret;
1033    }
1034
1035    ret = hax_get_msrs(env);
1036    if (ret < 0) {
1037        return ret;
1038    }
1039
1040    x86_update_hflags(env);
1041    return 0;
1042}
1043
1044static int hax_arch_set_registers(CPUArchState *env)
1045{
1046    int ret;
1047    ret = hax_sync_vcpu_register(env, 1);
1048
1049    if (ret < 0) {
1050        fprintf(stderr, "Failed to sync vcpu reg\n");
1051        return ret;
1052    }
1053    ret = hax_set_fpu(env);
1054    if (ret < 0) {
1055        fprintf(stderr, "FPU failed\n");
1056        return ret;
1057    }
1058    ret = hax_set_msrs(env);
1059    if (ret < 0) {
1060        fprintf(stderr, "MSR failed\n");
1061        return ret;
1062    }
1063
1064    return 0;
1065}
1066
1067static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1068{
1069    if (hax_enabled()) {
1070        if (modified) {
1071            hax_arch_set_registers(env);
1072        } else {
1073            hax_arch_get_registers(env);
1074        }
1075    }
1076}
1077
1078/*
1079 * much simpler than kvm, at least in first stage because:
1080 * We don't need consider the device pass-through, we don't need
1081 * consider the framebuffer, and we may even remove the bios at all
1082 */
1083int hax_sync_vcpus(void)
1084{
1085    if (hax_enabled()) {
1086        CPUState *cpu;
1087
1088        cpu = first_cpu;
1089        if (!cpu) {
1090            return 0;
1091        }
1092
1093        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1094            int ret;
1095
1096            ret = hax_arch_set_registers(cpu->env_ptr);
1097            if (ret < 0) {
1098                return ret;
1099            }
1100        }
1101    }
1102
1103    return 0;
1104}
1105
1106void hax_reset_vcpu_state(void *opaque)
1107{
1108    CPUState *cpu;
1109    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1110        cpu->hax_vcpu->tunnel->user_event_pending = 0;
1111        cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1112    }
1113}
1114
1115static void hax_accel_class_init(ObjectClass *oc, void *data)
1116{
1117    AccelClass *ac = ACCEL_CLASS(oc);
1118    ac->name = "HAX";
1119    ac->init_machine = hax_accel_init;
1120    ac->allowed = &hax_allowed;
1121}
1122
1123static const TypeInfo hax_accel_type = {
1124    .name = ACCEL_CLASS_NAME("hax"),
1125    .parent = TYPE_ACCEL,
1126    .class_init = hax_accel_class_init,
1127};
1128
1129static void hax_type_init(void)
1130{
1131    type_register_static(&hax_accel_type);
1132}
1133
1134type_init(hax_type_init);
1135