qemu/target/i386/hax-all.c
<<
>>
Prefs
   1/*
   2 * QEMU HAX support
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *           Red Hat, Inc. 2008
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Glauber Costa     <gcosta@redhat.com>
  10 *
  11 * Copyright (c) 2011 Intel Corporation
  12 *  Written by:
  13 *  Jiang Yunhong<yunhong.jiang@intel.com>
  14 *  Xin Xiaohui<xiaohui.xin@intel.com>
  15 *  Zhang Xiantao<xiantao.zhang@intel.com>
  16 *
  17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18 * See the COPYING file in the top-level directory.
  19 *
  20 */
  21
  22/*
  23 * HAX common code for both windows and darwin
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "cpu.h"
  28#include "exec/address-spaces.h"
  29#include "exec/exec-all.h"
  30#include "exec/ioport.h"
  31
  32#include "qemu-common.h"
  33#include "hax-i386.h"
  34#include "sysemu/accel.h"
  35#include "sysemu/sysemu.h"
  36#include "qemu/main-loop.h"
  37#include "hw/boards.h"
  38
  39#define DEBUG_HAX 0
  40
  41#define DPRINTF(fmt, ...) \
  42    do { \
  43        if (DEBUG_HAX) { \
  44            fprintf(stdout, fmt, ## __VA_ARGS__); \
  45        } \
  46    } while (0)
  47
  48/* Current version */
  49const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  50/* Minimum HAX kernel version */
  51const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  52
  53static bool hax_allowed;
  54
  55struct hax_state hax_global;
  56
  57static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  58static int hax_arch_get_registers(CPUArchState *env);
  59
  60int hax_enabled(void)
  61{
  62    return hax_allowed;
  63}
  64
  65int valid_hax_tunnel_size(uint16_t size)
  66{
  67    return size >= sizeof(struct hax_tunnel);
  68}
  69
  70hax_fd hax_vcpu_get_fd(CPUArchState *env)
  71{
  72    struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu;
  73    if (!vcpu) {
  74        return HAX_INVALID_FD;
  75    }
  76    return vcpu->fd;
  77}
  78
  79static int hax_get_capability(struct hax_state *hax)
  80{
  81    int ret;
  82    struct hax_capabilityinfo capinfo, *cap = &capinfo;
  83
  84    ret = hax_capability(hax, cap);
  85    if (ret) {
  86        return ret;
  87    }
  88
  89    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  90        if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  91            DPRINTF
  92                ("VTX feature is not enabled, HAX driver will not work.\n");
  93        } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  94            DPRINTF
  95                ("NX feature is not enabled, HAX driver will not work.\n");
  96        }
  97        return -ENXIO;
  98
  99    }
 100
 101    if (!(cap->winfo & HAX_CAP_UG)) {
 102        fprintf(stderr, "UG mode is not supported by the hardware.\n");
 103        return -ENOTSUP;
 104    }
 105
 106    hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
 107
 108    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 109        if (cap->mem_quota < hax->mem_quota) {
 110            fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 111            return -ENOSPC;
 112        }
 113    }
 114    return 0;
 115}
 116
 117static int hax_version_support(struct hax_state *hax)
 118{
 119    int ret;
 120    struct hax_module_version version;
 121
 122    ret = hax_mod_version(hax, &version);
 123    if (ret < 0) {
 124        return 0;
 125    }
 126
 127    if (hax_min_version > version.cur_version) {
 128        fprintf(stderr, "Incompatible HAX module version %d,",
 129                version.cur_version);
 130        fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 131        return 0;
 132    }
 133    if (hax_cur_version < version.compat_version) {
 134        fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 135                hax_cur_version);
 136        fprintf(stderr, "requires minimum HAX API version %x\n",
 137                version.compat_version);
 138        return 0;
 139    }
 140
 141    return 1;
 142}
 143
 144int hax_vcpu_create(int id)
 145{
 146    struct hax_vcpu_state *vcpu = NULL;
 147    int ret;
 148
 149    if (!hax_global.vm) {
 150        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 151        return -1;
 152    }
 153
 154    if (hax_global.vm->vcpus[id]) {
 155        fprintf(stderr, "vcpu %x allocated already\n", id);
 156        return 0;
 157    }
 158
 159    vcpu = g_malloc(sizeof(struct hax_vcpu_state));
 160    if (!vcpu) {
 161        fprintf(stderr, "Failed to alloc vcpu state\n");
 162        return -ENOMEM;
 163    }
 164
 165    memset(vcpu, 0, sizeof(struct hax_vcpu_state));
 166
 167    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 168    if (ret) {
 169        fprintf(stderr, "Failed to create vcpu %x\n", id);
 170        goto error;
 171    }
 172
 173    vcpu->vcpu_id = id;
 174    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 175    if (hax_invalid_fd(vcpu->fd)) {
 176        fprintf(stderr, "Failed to open the vcpu\n");
 177        ret = -ENODEV;
 178        goto error;
 179    }
 180
 181    hax_global.vm->vcpus[id] = vcpu;
 182
 183    ret = hax_host_setup_vcpu_channel(vcpu);
 184    if (ret) {
 185        fprintf(stderr, "Invalid hax tunnel size\n");
 186        ret = -EINVAL;
 187        goto error;
 188    }
 189    return 0;
 190
 191  error:
 192    /* vcpu and tunnel will be closed automatically */
 193    if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 194        hax_close_fd(vcpu->fd);
 195    }
 196
 197    hax_global.vm->vcpus[id] = NULL;
 198    g_free(vcpu);
 199    return -1;
 200}
 201
 202int hax_vcpu_destroy(CPUState *cpu)
 203{
 204    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 205
 206    if (!hax_global.vm) {
 207        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 208        return -1;
 209    }
 210
 211    if (!vcpu) {
 212        return 0;
 213    }
 214
 215    /*
 216     * 1. The hax_tunnel is also destroied when vcpu destroy
 217     * 2. close fd will cause hax module vcpu be cleaned
 218     */
 219    hax_close_fd(vcpu->fd);
 220    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 221    g_free(vcpu);
 222    return 0;
 223}
 224
 225int hax_init_vcpu(CPUState *cpu)
 226{
 227    int ret;
 228
 229    ret = hax_vcpu_create(cpu->cpu_index);
 230    if (ret < 0) {
 231        fprintf(stderr, "Failed to create HAX vcpu\n");
 232        exit(-1);
 233    }
 234
 235    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 236    cpu->vcpu_dirty = true;
 237    qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 238
 239    return ret;
 240}
 241
 242struct hax_vm *hax_vm_create(struct hax_state *hax)
 243{
 244    struct hax_vm *vm;
 245    int vm_id = 0, ret;
 246
 247    if (hax_invalid_fd(hax->fd)) {
 248        return NULL;
 249    }
 250
 251    if (hax->vm) {
 252        return hax->vm;
 253    }
 254
 255    vm = g_malloc(sizeof(struct hax_vm));
 256    if (!vm) {
 257        return NULL;
 258    }
 259    memset(vm, 0, sizeof(struct hax_vm));
 260    ret = hax_host_create_vm(hax, &vm_id);
 261    if (ret) {
 262        fprintf(stderr, "Failed to create vm %x\n", ret);
 263        goto error;
 264    }
 265    vm->id = vm_id;
 266    vm->fd = hax_host_open_vm(hax, vm_id);
 267    if (hax_invalid_fd(vm->fd)) {
 268        fprintf(stderr, "Failed to open vm %d\n", vm_id);
 269        goto error;
 270    }
 271
 272    hax->vm = vm;
 273    return vm;
 274
 275  error:
 276    g_free(vm);
 277    hax->vm = NULL;
 278    return NULL;
 279}
 280
 281int hax_vm_destroy(struct hax_vm *vm)
 282{
 283    int i;
 284
 285    for (i = 0; i < HAX_MAX_VCPU; i++)
 286        if (vm->vcpus[i]) {
 287            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 288            return -1;
 289        }
 290    hax_close_fd(vm->fd);
 291    g_free(vm);
 292    hax_global.vm = NULL;
 293    return 0;
 294}
 295
 296static void hax_handle_interrupt(CPUState *cpu, int mask)
 297{
 298    cpu->interrupt_request |= mask;
 299
 300    if (!qemu_cpu_is_self(cpu)) {
 301        qemu_cpu_kick(cpu);
 302    }
 303}
 304
 305static int hax_init(ram_addr_t ram_size)
 306{
 307    struct hax_state *hax = NULL;
 308    struct hax_qemu_version qversion;
 309    int ret;
 310
 311    hax = &hax_global;
 312
 313    memset(hax, 0, sizeof(struct hax_state));
 314    hax->mem_quota = ram_size;
 315
 316    hax->fd = hax_mod_open();
 317    if (hax_invalid_fd(hax->fd)) {
 318        hax->fd = 0;
 319        ret = -ENODEV;
 320        goto error;
 321    }
 322
 323    ret = hax_get_capability(hax);
 324
 325    if (ret) {
 326        if (ret != -ENOSPC) {
 327            ret = -EINVAL;
 328        }
 329        goto error;
 330    }
 331
 332    if (!hax_version_support(hax)) {
 333        ret = -EINVAL;
 334        goto error;
 335    }
 336
 337    hax->vm = hax_vm_create(hax);
 338    if (!hax->vm) {
 339        fprintf(stderr, "Failed to create HAX VM\n");
 340        ret = -EINVAL;
 341        goto error;
 342    }
 343
 344    hax_memory_init();
 345
 346    qversion.cur_version = hax_cur_version;
 347    qversion.min_version = hax_min_version;
 348    hax_notify_qemu_version(hax->vm->fd, &qversion);
 349    cpu_interrupt_handler = hax_handle_interrupt;
 350
 351    return ret;
 352  error:
 353    if (hax->vm) {
 354        hax_vm_destroy(hax->vm);
 355    }
 356    if (hax->fd) {
 357        hax_mod_close(hax);
 358    }
 359
 360    return ret;
 361}
 362
 363static int hax_accel_init(MachineState *ms)
 364{
 365    int ret = hax_init(ms->ram_size);
 366
 367    if (ret && (ret != -ENOSPC)) {
 368        fprintf(stderr, "No accelerator found.\n");
 369    } else {
 370        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 371                !ret ? "working" : "not working",
 372                !ret ? "fast virt" : "emulation");
 373    }
 374    return ret;
 375}
 376
 377static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 378{
 379    if (hft->direction < 2) {
 380        cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
 381                               hft->direction);
 382    } else {
 383        /*
 384         * HAX API v4 supports transferring data between two MMIO addresses,
 385         * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 386         *  hft->direction == 2: gpa ==> gpa2
 387         */
 388        uint64_t value;
 389        cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
 390        cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
 391    }
 392
 393    return 0;
 394}
 395
 396static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 397                         int direction, int size, int count, void *buffer)
 398{
 399    uint8_t *ptr;
 400    int i;
 401    MemTxAttrs attrs = { 0 };
 402
 403    if (!df) {
 404        ptr = (uint8_t *) buffer;
 405    } else {
 406        ptr = buffer + size * count - size;
 407    }
 408    for (i = 0; i < count; i++) {
 409        address_space_rw(&address_space_io, port, attrs,
 410                         ptr, size, direction == HAX_EXIT_IO_OUT);
 411        if (!df) {
 412            ptr += size;
 413        } else {
 414            ptr -= size;
 415        }
 416    }
 417
 418    return 0;
 419}
 420
 421static int hax_vcpu_interrupt(CPUArchState *env)
 422{
 423    CPUState *cpu = ENV_GET_CPU(env);
 424    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 425    struct hax_tunnel *ht = vcpu->tunnel;
 426
 427    /*
 428     * Try to inject an interrupt if the guest can accept it
 429     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 430     */
 431    if (ht->ready_for_interrupt_injection &&
 432        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 433        int irq;
 434
 435        irq = cpu_get_pic_interrupt(env);
 436        if (irq >= 0) {
 437            hax_inject_interrupt(env, irq);
 438            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 439        }
 440    }
 441
 442    /* If we have an interrupt but the guest is not ready to receive an
 443     * interrupt, request an interrupt window exit.  This will
 444     * cause a return to userspace as soon as the guest is ready to
 445     * receive interrupts. */
 446    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 447        ht->request_interrupt_window = 1;
 448    } else {
 449        ht->request_interrupt_window = 0;
 450    }
 451    return 0;
 452}
 453
 454void hax_raise_event(CPUState *cpu)
 455{
 456    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 457
 458    if (!vcpu) {
 459        return;
 460    }
 461    vcpu->tunnel->user_event_pending = 1;
 462}
 463
 464/*
 465 * Ask hax kernel module to run the CPU for us till:
 466 * 1. Guest crash or shutdown
 467 * 2. Need QEMU's emulation like guest execute MMIO instruction
 468 * 3. Guest execute HLT
 469 * 4. QEMU have Signal/event pending
 470 * 5. An unknown VMX exit happens
 471 */
 472static int hax_vcpu_hax_exec(CPUArchState *env)
 473{
 474    int ret = 0;
 475    CPUState *cpu = ENV_GET_CPU(env);
 476    X86CPU *x86_cpu = X86_CPU(cpu);
 477    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 478    struct hax_tunnel *ht = vcpu->tunnel;
 479
 480    if (!hax_enabled()) {
 481        DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 482        return 0;
 483    }
 484
 485    cpu->halted = 0;
 486
 487    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 488        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 489        apic_poll_irq(x86_cpu->apic_state);
 490    }
 491
 492    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 493        DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 494                cpu->cpu_index);
 495        do_cpu_init(x86_cpu);
 496        hax_vcpu_sync_state(env, 1);
 497    }
 498
 499    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 500        DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 501                cpu->cpu_index);
 502        hax_vcpu_sync_state(env, 0);
 503        do_cpu_sipi(x86_cpu);
 504        hax_vcpu_sync_state(env, 1);
 505    }
 506
 507    do {
 508        int hax_ret;
 509
 510        if (cpu->exit_request) {
 511            ret = 1;
 512            break;
 513        }
 514
 515        hax_vcpu_interrupt(env);
 516
 517        qemu_mutex_unlock_iothread();
 518        cpu_exec_start(cpu);
 519        hax_ret = hax_vcpu_run(vcpu);
 520        cpu_exec_end(cpu);
 521        qemu_mutex_lock_iothread();
 522
 523        /* Simply continue the vcpu_run if system call interrupted */
 524        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 525            DPRINTF("io window interrupted\n");
 526            continue;
 527        }
 528
 529        if (hax_ret < 0) {
 530            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 531            abort();
 532        }
 533        switch (ht->_exit_status) {
 534        case HAX_EXIT_IO:
 535            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 536                            ht->pio._direction,
 537                            ht->pio._size, ht->pio._count, vcpu->iobuf);
 538            break;
 539        case HAX_EXIT_FAST_MMIO:
 540            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 541            break;
 542        /* Guest state changed, currently only for shutdown */
 543        case HAX_EXIT_STATECHANGE:
 544            fprintf(stdout, "VCPU shutdown request\n");
 545            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 546            hax_vcpu_sync_state(env, 0);
 547            ret = 1;
 548            break;
 549        case HAX_EXIT_UNKNOWN_VMEXIT:
 550            fprintf(stderr, "Unknown VMX exit %x from guest\n",
 551                    ht->_exit_reason);
 552            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 553            hax_vcpu_sync_state(env, 0);
 554            cpu_dump_state(cpu, stderr, fprintf, 0);
 555            ret = -1;
 556            break;
 557        case HAX_EXIT_HLT:
 558            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 559                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 560                /* hlt instruction with interrupt disabled is shutdown */
 561                env->eflags |= IF_MASK;
 562                cpu->halted = 1;
 563                cpu->exception_index = EXCP_HLT;
 564                ret = 1;
 565            }
 566            break;
 567        /* these situations will continue to hax module */
 568        case HAX_EXIT_INTERRUPT:
 569        case HAX_EXIT_PAUSED:
 570            break;
 571        case HAX_EXIT_MMIO:
 572            /* Should not happen on UG system */
 573            fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 574            ret = -1;
 575            break;
 576        case HAX_EXIT_REAL:
 577            /* Should not happen on UG system */
 578            fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 579            ret = -1;
 580            break;
 581        default:
 582            fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 583            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 584            hax_vcpu_sync_state(env, 0);
 585            cpu_dump_state(cpu, stderr, fprintf, 0);
 586            ret = 1;
 587            break;
 588        }
 589    } while (!ret);
 590
 591    if (cpu->exit_request) {
 592        cpu->exit_request = 0;
 593        cpu->exception_index = EXCP_INTERRUPT;
 594    }
 595    return ret < 0;
 596}
 597
 598static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 599{
 600    CPUArchState *env = cpu->env_ptr;
 601
 602    hax_arch_get_registers(env);
 603    cpu->vcpu_dirty = true;
 604}
 605
 606void hax_cpu_synchronize_state(CPUState *cpu)
 607{
 608    if (!cpu->vcpu_dirty) {
 609        run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 610    }
 611}
 612
 613static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 614                                              run_on_cpu_data arg)
 615{
 616    CPUArchState *env = cpu->env_ptr;
 617
 618    hax_vcpu_sync_state(env, 1);
 619    cpu->vcpu_dirty = false;
 620}
 621
 622void hax_cpu_synchronize_post_reset(CPUState *cpu)
 623{
 624    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 625}
 626
 627static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 628{
 629    CPUArchState *env = cpu->env_ptr;
 630
 631    hax_vcpu_sync_state(env, 1);
 632    cpu->vcpu_dirty = false;
 633}
 634
 635void hax_cpu_synchronize_post_init(CPUState *cpu)
 636{
 637    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 638}
 639
 640static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 641{
 642    cpu->vcpu_dirty = true;
 643}
 644
 645void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 646{
 647    run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 648}
 649
 650int hax_smp_cpu_exec(CPUState *cpu)
 651{
 652    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 653    int fatal;
 654    int ret;
 655
 656    while (1) {
 657        if (cpu->exception_index >= EXCP_INTERRUPT) {
 658            ret = cpu->exception_index;
 659            cpu->exception_index = -1;
 660            break;
 661        }
 662
 663        fatal = hax_vcpu_hax_exec(env);
 664
 665        if (fatal) {
 666            fprintf(stderr, "Unsupported HAX vcpu return\n");
 667            abort();
 668        }
 669    }
 670
 671    return ret;
 672}
 673
 674static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 675{
 676    memset(lhs, 0, sizeof(struct segment_desc_t));
 677    lhs->selector = rhs->selector;
 678    lhs->base = rhs->base;
 679    lhs->limit = rhs->limit;
 680    lhs->type = 3;
 681    lhs->present = 1;
 682    lhs->dpl = 3;
 683    lhs->operand_size = 0;
 684    lhs->desc = 1;
 685    lhs->long_mode = 0;
 686    lhs->granularity = 0;
 687    lhs->available = 0;
 688}
 689
 690static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 691{
 692    lhs->selector = rhs->selector;
 693    lhs->base = rhs->base;
 694    lhs->limit = rhs->limit;
 695    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 696        | (rhs->present * DESC_P_MASK)
 697        | (rhs->dpl << DESC_DPL_SHIFT)
 698        | (rhs->operand_size << DESC_B_SHIFT)
 699        | (rhs->desc * DESC_S_MASK)
 700        | (rhs->long_mode << DESC_L_SHIFT)
 701        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 702}
 703
 704static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 705{
 706    unsigned flags = rhs->flags;
 707
 708    memset(lhs, 0, sizeof(struct segment_desc_t));
 709    lhs->selector = rhs->selector;
 710    lhs->base = rhs->base;
 711    lhs->limit = rhs->limit;
 712    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 713    lhs->present = (flags & DESC_P_MASK) != 0;
 714    lhs->dpl = rhs->selector & 3;
 715    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 716    lhs->desc = (flags & DESC_S_MASK) != 0;
 717    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 718    lhs->granularity = (flags & DESC_G_MASK) != 0;
 719    lhs->available = (flags & DESC_AVL_MASK) != 0;
 720}
 721
 722static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 723{
 724    target_ulong reg = *hax_reg;
 725
 726    if (set) {
 727        *hax_reg = *qemu_reg;
 728    } else {
 729        *qemu_reg = reg;
 730    }
 731}
 732
 733/* The sregs has been synced with HAX kernel already before this call */
 734static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 735{
 736    get_seg(&env->segs[R_CS], &sregs->_cs);
 737    get_seg(&env->segs[R_DS], &sregs->_ds);
 738    get_seg(&env->segs[R_ES], &sregs->_es);
 739    get_seg(&env->segs[R_FS], &sregs->_fs);
 740    get_seg(&env->segs[R_GS], &sregs->_gs);
 741    get_seg(&env->segs[R_SS], &sregs->_ss);
 742
 743    get_seg(&env->tr, &sregs->_tr);
 744    get_seg(&env->ldt, &sregs->_ldt);
 745    env->idt.limit = sregs->_idt.limit;
 746    env->idt.base = sregs->_idt.base;
 747    env->gdt.limit = sregs->_gdt.limit;
 748    env->gdt.base = sregs->_gdt.base;
 749    return 0;
 750}
 751
 752static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 753{
 754    if ((env->eflags & VM_MASK)) {
 755        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 756        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 757        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 758        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 759        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 760        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 761    } else {
 762        set_seg(&sregs->_cs, &env->segs[R_CS]);
 763        set_seg(&sregs->_ds, &env->segs[R_DS]);
 764        set_seg(&sregs->_es, &env->segs[R_ES]);
 765        set_seg(&sregs->_fs, &env->segs[R_FS]);
 766        set_seg(&sregs->_gs, &env->segs[R_GS]);
 767        set_seg(&sregs->_ss, &env->segs[R_SS]);
 768
 769        if (env->cr[0] & CR0_PE_MASK) {
 770            /* force ss cpl to cs cpl */
 771            sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 772                                  (sregs->_cs.selector & 3);
 773            sregs->_ss.dpl = sregs->_ss.selector & 3;
 774        }
 775    }
 776
 777    set_seg(&sregs->_tr, &env->tr);
 778    set_seg(&sregs->_ldt, &env->ldt);
 779    sregs->_idt.limit = env->idt.limit;
 780    sregs->_idt.base = env->idt.base;
 781    sregs->_gdt.limit = env->gdt.limit;
 782    sregs->_gdt.base = env->gdt.base;
 783    return 0;
 784}
 785
 786static int hax_sync_vcpu_register(CPUArchState *env, int set)
 787{
 788    struct vcpu_state_t regs;
 789    int ret;
 790    memset(&regs, 0, sizeof(struct vcpu_state_t));
 791
 792    if (!set) {
 793        ret = hax_sync_vcpu_state(env, &regs, 0);
 794        if (ret < 0) {
 795            return -1;
 796        }
 797    }
 798
 799    /* generic register */
 800    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 801    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 802    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 803    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 804    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 805    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 806    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 807    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 808#ifdef TARGET_X86_64
 809    hax_getput_reg(&regs._r8, &env->regs[8], set);
 810    hax_getput_reg(&regs._r9, &env->regs[9], set);
 811    hax_getput_reg(&regs._r10, &env->regs[10], set);
 812    hax_getput_reg(&regs._r11, &env->regs[11], set);
 813    hax_getput_reg(&regs._r12, &env->regs[12], set);
 814    hax_getput_reg(&regs._r13, &env->regs[13], set);
 815    hax_getput_reg(&regs._r14, &env->regs[14], set);
 816    hax_getput_reg(&regs._r15, &env->regs[15], set);
 817#endif
 818    hax_getput_reg(&regs._rflags, &env->eflags, set);
 819    hax_getput_reg(&regs._rip, &env->eip, set);
 820
 821    if (set) {
 822        regs._cr0 = env->cr[0];
 823        regs._cr2 = env->cr[2];
 824        regs._cr3 = env->cr[3];
 825        regs._cr4 = env->cr[4];
 826        hax_set_segments(env, &regs);
 827    } else {
 828        env->cr[0] = regs._cr0;
 829        env->cr[2] = regs._cr2;
 830        env->cr[3] = regs._cr3;
 831        env->cr[4] = regs._cr4;
 832        hax_get_segments(env, &regs);
 833    }
 834
 835    if (set) {
 836        ret = hax_sync_vcpu_state(env, &regs, 1);
 837        if (ret < 0) {
 838            return -1;
 839        }
 840    }
 841    return 0;
 842}
 843
 844static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 845                              uint64_t value)
 846{
 847    item->entry = index;
 848    item->value = value;
 849}
 850
 851static int hax_get_msrs(CPUArchState *env)
 852{
 853    struct hax_msr_data md;
 854    struct vmx_msr *msrs = md.entries;
 855    int ret, i, n;
 856
 857    n = 0;
 858    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 859    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 860    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 861    msrs[n++].entry = MSR_IA32_TSC;
 862#ifdef TARGET_X86_64
 863    msrs[n++].entry = MSR_EFER;
 864    msrs[n++].entry = MSR_STAR;
 865    msrs[n++].entry = MSR_LSTAR;
 866    msrs[n++].entry = MSR_CSTAR;
 867    msrs[n++].entry = MSR_FMASK;
 868    msrs[n++].entry = MSR_KERNELGSBASE;
 869#endif
 870    md.nr_msr = n;
 871    ret = hax_sync_msr(env, &md, 0);
 872    if (ret < 0) {
 873        return ret;
 874    }
 875
 876    for (i = 0; i < md.done; i++) {
 877        switch (msrs[i].entry) {
 878        case MSR_IA32_SYSENTER_CS:
 879            env->sysenter_cs = msrs[i].value;
 880            break;
 881        case MSR_IA32_SYSENTER_ESP:
 882            env->sysenter_esp = msrs[i].value;
 883            break;
 884        case MSR_IA32_SYSENTER_EIP:
 885            env->sysenter_eip = msrs[i].value;
 886            break;
 887        case MSR_IA32_TSC:
 888            env->tsc = msrs[i].value;
 889            break;
 890#ifdef TARGET_X86_64
 891        case MSR_EFER:
 892            env->efer = msrs[i].value;
 893            break;
 894        case MSR_STAR:
 895            env->star = msrs[i].value;
 896            break;
 897        case MSR_LSTAR:
 898            env->lstar = msrs[i].value;
 899            break;
 900        case MSR_CSTAR:
 901            env->cstar = msrs[i].value;
 902            break;
 903        case MSR_FMASK:
 904            env->fmask = msrs[i].value;
 905            break;
 906        case MSR_KERNELGSBASE:
 907            env->kernelgsbase = msrs[i].value;
 908            break;
 909#endif
 910        }
 911    }
 912
 913    return 0;
 914}
 915
 916static int hax_set_msrs(CPUArchState *env)
 917{
 918    struct hax_msr_data md;
 919    struct vmx_msr *msrs;
 920    msrs = md.entries;
 921    int n = 0;
 922
 923    memset(&md, 0, sizeof(struct hax_msr_data));
 924    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 925    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 926    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 927    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 928#ifdef TARGET_X86_64
 929    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 930    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 931    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 932    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 933    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 934    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 935#endif
 936    md.nr_msr = n;
 937    md.done = 0;
 938
 939    return hax_sync_msr(env, &md, 1);
 940}
 941
 942static int hax_get_fpu(CPUArchState *env)
 943{
 944    struct fx_layout fpu;
 945    int i, ret;
 946
 947    ret = hax_sync_fpu(env, &fpu, 0);
 948    if (ret < 0) {
 949        return ret;
 950    }
 951
 952    env->fpstt = (fpu.fsw >> 11) & 7;
 953    env->fpus = fpu.fsw;
 954    env->fpuc = fpu.fcw;
 955    for (i = 0; i < 8; ++i) {
 956        env->fptags[i] = !((fpu.ftw >> i) & 1);
 957    }
 958    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
 959
 960    for (i = 0; i < 8; i++) {
 961        env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
 962        env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
 963        if (CPU_NB_REGS > 8) {
 964            env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
 965            env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
 966        }
 967    }
 968    env->mxcsr = fpu.mxcsr;
 969
 970    return 0;
 971}
 972
 973static int hax_set_fpu(CPUArchState *env)
 974{
 975    struct fx_layout fpu;
 976    int i;
 977
 978    memset(&fpu, 0, sizeof(fpu));
 979    fpu.fsw = env->fpus & ~(7 << 11);
 980    fpu.fsw |= (env->fpstt & 7) << 11;
 981    fpu.fcw = env->fpuc;
 982
 983    for (i = 0; i < 8; ++i) {
 984        fpu.ftw |= (!env->fptags[i]) << i;
 985    }
 986
 987    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
 988    for (i = 0; i < 8; i++) {
 989        stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
 990        stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
 991        if (CPU_NB_REGS > 8) {
 992            stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
 993            stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
 994        }
 995    }
 996
 997    fpu.mxcsr = env->mxcsr;
 998
 999    return hax_sync_fpu(env, &fpu, 1);
1000}
1001
1002static int hax_arch_get_registers(CPUArchState *env)
1003{
1004    int ret;
1005
1006    ret = hax_sync_vcpu_register(env, 0);
1007    if (ret < 0) {
1008        return ret;
1009    }
1010
1011    ret = hax_get_fpu(env);
1012    if (ret < 0) {
1013        return ret;
1014    }
1015
1016    ret = hax_get_msrs(env);
1017    if (ret < 0) {
1018        return ret;
1019    }
1020
1021    x86_update_hflags(env);
1022    return 0;
1023}
1024
1025static int hax_arch_set_registers(CPUArchState *env)
1026{
1027    int ret;
1028    ret = hax_sync_vcpu_register(env, 1);
1029
1030    if (ret < 0) {
1031        fprintf(stderr, "Failed to sync vcpu reg\n");
1032        return ret;
1033    }
1034    ret = hax_set_fpu(env);
1035    if (ret < 0) {
1036        fprintf(stderr, "FPU failed\n");
1037        return ret;
1038    }
1039    ret = hax_set_msrs(env);
1040    if (ret < 0) {
1041        fprintf(stderr, "MSR failed\n");
1042        return ret;
1043    }
1044
1045    return 0;
1046}
1047
1048static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1049{
1050    if (hax_enabled()) {
1051        if (modified) {
1052            hax_arch_set_registers(env);
1053        } else {
1054            hax_arch_get_registers(env);
1055        }
1056    }
1057}
1058
1059/*
1060 * much simpler than kvm, at least in first stage because:
1061 * We don't need consider the device pass-through, we don't need
1062 * consider the framebuffer, and we may even remove the bios at all
1063 */
1064int hax_sync_vcpus(void)
1065{
1066    if (hax_enabled()) {
1067        CPUState *cpu;
1068
1069        cpu = first_cpu;
1070        if (!cpu) {
1071            return 0;
1072        }
1073
1074        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1075            int ret;
1076
1077            ret = hax_arch_set_registers(cpu->env_ptr);
1078            if (ret < 0) {
1079                return ret;
1080            }
1081        }
1082    }
1083
1084    return 0;
1085}
1086
1087void hax_reset_vcpu_state(void *opaque)
1088{
1089    CPUState *cpu;
1090    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1091        cpu->hax_vcpu->tunnel->user_event_pending = 0;
1092        cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1093    }
1094}
1095
1096static void hax_accel_class_init(ObjectClass *oc, void *data)
1097{
1098    AccelClass *ac = ACCEL_CLASS(oc);
1099    ac->name = "HAX";
1100    ac->init_machine = hax_accel_init;
1101    ac->allowed = &hax_allowed;
1102}
1103
1104static const TypeInfo hax_accel_type = {
1105    .name = ACCEL_CLASS_NAME("hax"),
1106    .parent = TYPE_ACCEL,
1107    .class_init = hax_accel_class_init,
1108};
1109
1110static void hax_type_init(void)
1111{
1112    type_register_static(&hax_accel_type);
1113}
1114
1115type_init(hax_type_init);
1116