qemu/target/i386/hax-all.c
<<
>>
Prefs
   1/*
   2 * QEMU HAX support
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *           Red Hat, Inc. 2008
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Glauber Costa     <gcosta@redhat.com>
  10 *
  11 * Copyright (c) 2011 Intel Corporation
  12 *  Written by:
  13 *  Jiang Yunhong<yunhong.jiang@intel.com>
  14 *  Xin Xiaohui<xiaohui.xin@intel.com>
  15 *  Zhang Xiantao<xiantao.zhang@intel.com>
  16 *
  17 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18 * See the COPYING file in the top-level directory.
  19 *
  20 */
  21
  22/*
  23 * HAX common code for both windows and darwin
  24 */
  25
  26#include "qemu/osdep.h"
  27#include "cpu.h"
  28#include "exec/address-spaces.h"
  29#include "exec/exec-all.h"
  30#include "exec/ioport.h"
  31
  32#include "qemu-common.h"
  33#include "strings.h"
  34#include "hax-i386.h"
  35#include "sysemu/accel.h"
  36#include "sysemu/sysemu.h"
  37#include "qemu/main-loop.h"
  38#include "hw/boards.h"
  39
  40#define DEBUG_HAX 0
  41
  42#define DPRINTF(fmt, ...) \
  43    do { \
  44        if (DEBUG_HAX) { \
  45            fprintf(stdout, fmt, ## __VA_ARGS__); \
  46        } \
  47    } while (0)
  48
  49/* Current version */
  50const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  51/* Minimum HAX kernel version */
  52const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  53
  54static bool hax_allowed;
  55
  56struct hax_state hax_global;
  57
  58static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  59static int hax_arch_get_registers(CPUArchState *env);
  60
  61int hax_enabled(void)
  62{
  63    return hax_allowed;
  64}
  65
  66int valid_hax_tunnel_size(uint16_t size)
  67{
  68    return size >= sizeof(struct hax_tunnel);
  69}
  70
  71hax_fd hax_vcpu_get_fd(CPUArchState *env)
  72{
  73    struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu;
  74    if (!vcpu) {
  75        return HAX_INVALID_FD;
  76    }
  77    return vcpu->fd;
  78}
  79
  80static int hax_get_capability(struct hax_state *hax)
  81{
  82    int ret;
  83    struct hax_capabilityinfo capinfo, *cap = &capinfo;
  84
  85    ret = hax_capability(hax, cap);
  86    if (ret) {
  87        return ret;
  88    }
  89
  90    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  91        if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  92            DPRINTF
  93                ("VTX feature is not enabled, HAX driver will not work.\n");
  94        } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  95            DPRINTF
  96                ("NX feature is not enabled, HAX driver will not work.\n");
  97        }
  98        return -ENXIO;
  99
 100    }
 101
 102    if (!(cap->winfo & HAX_CAP_UG)) {
 103        fprintf(stderr, "UG mode is not supported by the hardware.\n");
 104        return -ENOTSUP;
 105    }
 106
 107    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 108        if (cap->mem_quota < hax->mem_quota) {
 109            fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 110            return -ENOSPC;
 111        }
 112    }
 113    return 0;
 114}
 115
 116static int hax_version_support(struct hax_state *hax)
 117{
 118    int ret;
 119    struct hax_module_version version;
 120
 121    ret = hax_mod_version(hax, &version);
 122    if (ret < 0) {
 123        return 0;
 124    }
 125
 126    if (hax_min_version > version.cur_version) {
 127        fprintf(stderr, "Incompatible HAX module version %d,",
 128                version.cur_version);
 129        fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 130        return 0;
 131    }
 132    if (hax_cur_version < version.compat_version) {
 133        fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 134                hax_cur_version);
 135        fprintf(stderr, "requires minimum HAX API version %x\n",
 136                version.compat_version);
 137        return 0;
 138    }
 139
 140    return 1;
 141}
 142
 143int hax_vcpu_create(int id)
 144{
 145    struct hax_vcpu_state *vcpu = NULL;
 146    int ret;
 147
 148    if (!hax_global.vm) {
 149        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 150        return -1;
 151    }
 152
 153    if (hax_global.vm->vcpus[id]) {
 154        fprintf(stderr, "vcpu %x allocated already\n", id);
 155        return 0;
 156    }
 157
 158    vcpu = g_malloc(sizeof(struct hax_vcpu_state));
 159    if (!vcpu) {
 160        fprintf(stderr, "Failed to alloc vcpu state\n");
 161        return -ENOMEM;
 162    }
 163
 164    memset(vcpu, 0, sizeof(struct hax_vcpu_state));
 165
 166    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 167    if (ret) {
 168        fprintf(stderr, "Failed to create vcpu %x\n", id);
 169        goto error;
 170    }
 171
 172    vcpu->vcpu_id = id;
 173    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 174    if (hax_invalid_fd(vcpu->fd)) {
 175        fprintf(stderr, "Failed to open the vcpu\n");
 176        ret = -ENODEV;
 177        goto error;
 178    }
 179
 180    hax_global.vm->vcpus[id] = vcpu;
 181
 182    ret = hax_host_setup_vcpu_channel(vcpu);
 183    if (ret) {
 184        fprintf(stderr, "Invalid hax tunnel size\n");
 185        ret = -EINVAL;
 186        goto error;
 187    }
 188    return 0;
 189
 190  error:
 191    /* vcpu and tunnel will be closed automatically */
 192    if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 193        hax_close_fd(vcpu->fd);
 194    }
 195
 196    hax_global.vm->vcpus[id] = NULL;
 197    g_free(vcpu);
 198    return -1;
 199}
 200
 201int hax_vcpu_destroy(CPUState *cpu)
 202{
 203    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 204
 205    if (!hax_global.vm) {
 206        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 207        return -1;
 208    }
 209
 210    if (!vcpu) {
 211        return 0;
 212    }
 213
 214    /*
 215     * 1. The hax_tunnel is also destroied when vcpu destroy
 216     * 2. close fd will cause hax module vcpu be cleaned
 217     */
 218    hax_close_fd(vcpu->fd);
 219    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 220    g_free(vcpu);
 221    return 0;
 222}
 223
 224int hax_init_vcpu(CPUState *cpu)
 225{
 226    int ret;
 227
 228    ret = hax_vcpu_create(cpu->cpu_index);
 229    if (ret < 0) {
 230        fprintf(stderr, "Failed to create HAX vcpu\n");
 231        exit(-1);
 232    }
 233
 234    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 235    cpu->vcpu_dirty = true;
 236    qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 237
 238    return ret;
 239}
 240
 241struct hax_vm *hax_vm_create(struct hax_state *hax)
 242{
 243    struct hax_vm *vm;
 244    int vm_id = 0, ret;
 245
 246    if (hax_invalid_fd(hax->fd)) {
 247        return NULL;
 248    }
 249
 250    if (hax->vm) {
 251        return hax->vm;
 252    }
 253
 254    vm = g_malloc(sizeof(struct hax_vm));
 255    if (!vm) {
 256        return NULL;
 257    }
 258    memset(vm, 0, sizeof(struct hax_vm));
 259    ret = hax_host_create_vm(hax, &vm_id);
 260    if (ret) {
 261        fprintf(stderr, "Failed to create vm %x\n", ret);
 262        goto error;
 263    }
 264    vm->id = vm_id;
 265    vm->fd = hax_host_open_vm(hax, vm_id);
 266    if (hax_invalid_fd(vm->fd)) {
 267        fprintf(stderr, "Failed to open vm %d\n", vm_id);
 268        goto error;
 269    }
 270
 271    hax->vm = vm;
 272    return vm;
 273
 274  error:
 275    g_free(vm);
 276    hax->vm = NULL;
 277    return NULL;
 278}
 279
 280int hax_vm_destroy(struct hax_vm *vm)
 281{
 282    int i;
 283
 284    for (i = 0; i < HAX_MAX_VCPU; i++)
 285        if (vm->vcpus[i]) {
 286            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 287            return -1;
 288        }
 289    hax_close_fd(vm->fd);
 290    g_free(vm);
 291    hax_global.vm = NULL;
 292    return 0;
 293}
 294
 295static void hax_handle_interrupt(CPUState *cpu, int mask)
 296{
 297    cpu->interrupt_request |= mask;
 298
 299    if (!qemu_cpu_is_self(cpu)) {
 300        qemu_cpu_kick(cpu);
 301    }
 302}
 303
 304static int hax_init(ram_addr_t ram_size)
 305{
 306    struct hax_state *hax = NULL;
 307    struct hax_qemu_version qversion;
 308    int ret;
 309
 310    hax = &hax_global;
 311
 312    memset(hax, 0, sizeof(struct hax_state));
 313    hax->mem_quota = ram_size;
 314
 315    hax->fd = hax_mod_open();
 316    if (hax_invalid_fd(hax->fd)) {
 317        hax->fd = 0;
 318        ret = -ENODEV;
 319        goto error;
 320    }
 321
 322    ret = hax_get_capability(hax);
 323
 324    if (ret) {
 325        if (ret != -ENOSPC) {
 326            ret = -EINVAL;
 327        }
 328        goto error;
 329    }
 330
 331    if (!hax_version_support(hax)) {
 332        ret = -EINVAL;
 333        goto error;
 334    }
 335
 336    hax->vm = hax_vm_create(hax);
 337    if (!hax->vm) {
 338        fprintf(stderr, "Failed to create HAX VM\n");
 339        ret = -EINVAL;
 340        goto error;
 341    }
 342
 343    hax_memory_init();
 344
 345    qversion.cur_version = hax_cur_version;
 346    qversion.min_version = hax_min_version;
 347    hax_notify_qemu_version(hax->vm->fd, &qversion);
 348    cpu_interrupt_handler = hax_handle_interrupt;
 349
 350    return ret;
 351  error:
 352    if (hax->vm) {
 353        hax_vm_destroy(hax->vm);
 354    }
 355    if (hax->fd) {
 356        hax_mod_close(hax);
 357    }
 358
 359    return ret;
 360}
 361
 362static int hax_accel_init(MachineState *ms)
 363{
 364    int ret = hax_init(ms->ram_size);
 365
 366    if (ret && (ret != -ENOSPC)) {
 367        fprintf(stderr, "No accelerator found.\n");
 368    } else {
 369        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 370                !ret ? "working" : "not working",
 371                !ret ? "fast virt" : "emulation");
 372    }
 373    return ret;
 374}
 375
 376static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 377{
 378    if (hft->direction < 2) {
 379        cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
 380                               hft->direction);
 381    } else {
 382        /*
 383         * HAX API v4 supports transferring data between two MMIO addresses,
 384         * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 385         *  hft->direction == 2: gpa ==> gpa2
 386         */
 387        uint64_t value;
 388        cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
 389        cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
 390    }
 391
 392    return 0;
 393}
 394
 395static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 396                         int direction, int size, int count, void *buffer)
 397{
 398    uint8_t *ptr;
 399    int i;
 400    MemTxAttrs attrs = { 0 };
 401
 402    if (!df) {
 403        ptr = (uint8_t *) buffer;
 404    } else {
 405        ptr = buffer + size * count - size;
 406    }
 407    for (i = 0; i < count; i++) {
 408        address_space_rw(&address_space_io, port, attrs,
 409                         ptr, size, direction == HAX_EXIT_IO_OUT);
 410        if (!df) {
 411            ptr += size;
 412        } else {
 413            ptr -= size;
 414        }
 415    }
 416
 417    return 0;
 418}
 419
 420static int hax_vcpu_interrupt(CPUArchState *env)
 421{
 422    CPUState *cpu = ENV_GET_CPU(env);
 423    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 424    struct hax_tunnel *ht = vcpu->tunnel;
 425
 426    /*
 427     * Try to inject an interrupt if the guest can accept it
 428     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 429     */
 430    if (ht->ready_for_interrupt_injection &&
 431        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 432        int irq;
 433
 434        irq = cpu_get_pic_interrupt(env);
 435        if (irq >= 0) {
 436            hax_inject_interrupt(env, irq);
 437            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 438        }
 439    }
 440
 441    /* If we have an interrupt but the guest is not ready to receive an
 442     * interrupt, request an interrupt window exit.  This will
 443     * cause a return to userspace as soon as the guest is ready to
 444     * receive interrupts. */
 445    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 446        ht->request_interrupt_window = 1;
 447    } else {
 448        ht->request_interrupt_window = 0;
 449    }
 450    return 0;
 451}
 452
 453void hax_raise_event(CPUState *cpu)
 454{
 455    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 456
 457    if (!vcpu) {
 458        return;
 459    }
 460    vcpu->tunnel->user_event_pending = 1;
 461}
 462
 463/*
 464 * Ask hax kernel module to run the CPU for us till:
 465 * 1. Guest crash or shutdown
 466 * 2. Need QEMU's emulation like guest execute MMIO instruction
 467 * 3. Guest execute HLT
 468 * 4. QEMU have Signal/event pending
 469 * 5. An unknown VMX exit happens
 470 */
 471static int hax_vcpu_hax_exec(CPUArchState *env)
 472{
 473    int ret = 0;
 474    CPUState *cpu = ENV_GET_CPU(env);
 475    X86CPU *x86_cpu = X86_CPU(cpu);
 476    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 477    struct hax_tunnel *ht = vcpu->tunnel;
 478
 479    if (!hax_enabled()) {
 480        DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 481        return 0;
 482    }
 483
 484    cpu->halted = 0;
 485
 486    if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 487        cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 488        apic_poll_irq(x86_cpu->apic_state);
 489    }
 490
 491    if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 492        DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 493                cpu->cpu_index);
 494        do_cpu_init(x86_cpu);
 495        hax_vcpu_sync_state(env, 1);
 496    }
 497
 498    if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 499        DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 500                cpu->cpu_index);
 501        hax_vcpu_sync_state(env, 0);
 502        do_cpu_sipi(x86_cpu);
 503        hax_vcpu_sync_state(env, 1);
 504    }
 505
 506    do {
 507        int hax_ret;
 508
 509        if (cpu->exit_request) {
 510            ret = 1;
 511            break;
 512        }
 513
 514        hax_vcpu_interrupt(env);
 515
 516        qemu_mutex_unlock_iothread();
 517        cpu_exec_start(cpu);
 518        hax_ret = hax_vcpu_run(vcpu);
 519        cpu_exec_end(cpu);
 520        qemu_mutex_lock_iothread();
 521
 522        /* Simply continue the vcpu_run if system call interrupted */
 523        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 524            DPRINTF("io window interrupted\n");
 525            continue;
 526        }
 527
 528        if (hax_ret < 0) {
 529            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 530            abort();
 531        }
 532        switch (ht->_exit_status) {
 533        case HAX_EXIT_IO:
 534            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 535                            ht->pio._direction,
 536                            ht->pio._size, ht->pio._count, vcpu->iobuf);
 537            break;
 538        case HAX_EXIT_FAST_MMIO:
 539            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 540            break;
 541        /* Guest state changed, currently only for shutdown */
 542        case HAX_EXIT_STATECHANGE:
 543            fprintf(stdout, "VCPU shutdown request\n");
 544            qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 545            hax_vcpu_sync_state(env, 0);
 546            ret = 1;
 547            break;
 548        case HAX_EXIT_UNKNOWN_VMEXIT:
 549            fprintf(stderr, "Unknown VMX exit %x from guest\n",
 550                    ht->_exit_reason);
 551            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 552            hax_vcpu_sync_state(env, 0);
 553            cpu_dump_state(cpu, stderr, fprintf, 0);
 554            ret = -1;
 555            break;
 556        case HAX_EXIT_HLT:
 557            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 558                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 559                /* hlt instruction with interrupt disabled is shutdown */
 560                env->eflags |= IF_MASK;
 561                cpu->halted = 1;
 562                cpu->exception_index = EXCP_HLT;
 563                ret = 1;
 564            }
 565            break;
 566        /* these situations will continue to hax module */
 567        case HAX_EXIT_INTERRUPT:
 568        case HAX_EXIT_PAUSED:
 569            break;
 570        case HAX_EXIT_MMIO:
 571            /* Should not happen on UG system */
 572            fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 573            ret = -1;
 574            break;
 575        case HAX_EXIT_REAL:
 576            /* Should not happen on UG system */
 577            fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 578            ret = -1;
 579            break;
 580        default:
 581            fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 582            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 583            hax_vcpu_sync_state(env, 0);
 584            cpu_dump_state(cpu, stderr, fprintf, 0);
 585            ret = 1;
 586            break;
 587        }
 588    } while (!ret);
 589
 590    if (cpu->exit_request) {
 591        cpu->exit_request = 0;
 592        cpu->exception_index = EXCP_INTERRUPT;
 593    }
 594    return ret < 0;
 595}
 596
 597static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 598{
 599    CPUArchState *env = cpu->env_ptr;
 600
 601    hax_arch_get_registers(env);
 602    cpu->vcpu_dirty = true;
 603}
 604
 605void hax_cpu_synchronize_state(CPUState *cpu)
 606{
 607    if (!cpu->vcpu_dirty) {
 608        run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 609    }
 610}
 611
 612static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 613                                              run_on_cpu_data arg)
 614{
 615    CPUArchState *env = cpu->env_ptr;
 616
 617    hax_vcpu_sync_state(env, 1);
 618    cpu->vcpu_dirty = false;
 619}
 620
 621void hax_cpu_synchronize_post_reset(CPUState *cpu)
 622{
 623    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 624}
 625
 626static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 627{
 628    CPUArchState *env = cpu->env_ptr;
 629
 630    hax_vcpu_sync_state(env, 1);
 631    cpu->vcpu_dirty = false;
 632}
 633
 634void hax_cpu_synchronize_post_init(CPUState *cpu)
 635{
 636    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 637}
 638
 639static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 640{
 641    cpu->vcpu_dirty = true;
 642}
 643
 644void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 645{
 646    run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 647}
 648
 649int hax_smp_cpu_exec(CPUState *cpu)
 650{
 651    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 652    int fatal;
 653    int ret;
 654
 655    while (1) {
 656        if (cpu->exception_index >= EXCP_INTERRUPT) {
 657            ret = cpu->exception_index;
 658            cpu->exception_index = -1;
 659            break;
 660        }
 661
 662        fatal = hax_vcpu_hax_exec(env);
 663
 664        if (fatal) {
 665            fprintf(stderr, "Unsupported HAX vcpu return\n");
 666            abort();
 667        }
 668    }
 669
 670    return ret;
 671}
 672
 673static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 674{
 675    memset(lhs, 0, sizeof(struct segment_desc_t));
 676    lhs->selector = rhs->selector;
 677    lhs->base = rhs->base;
 678    lhs->limit = rhs->limit;
 679    lhs->type = 3;
 680    lhs->present = 1;
 681    lhs->dpl = 3;
 682    lhs->operand_size = 0;
 683    lhs->desc = 1;
 684    lhs->long_mode = 0;
 685    lhs->granularity = 0;
 686    lhs->available = 0;
 687}
 688
 689static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 690{
 691    lhs->selector = rhs->selector;
 692    lhs->base = rhs->base;
 693    lhs->limit = rhs->limit;
 694    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 695        | (rhs->present * DESC_P_MASK)
 696        | (rhs->dpl << DESC_DPL_SHIFT)
 697        | (rhs->operand_size << DESC_B_SHIFT)
 698        | (rhs->desc * DESC_S_MASK)
 699        | (rhs->long_mode << DESC_L_SHIFT)
 700        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 701}
 702
 703static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 704{
 705    unsigned flags = rhs->flags;
 706
 707    memset(lhs, 0, sizeof(struct segment_desc_t));
 708    lhs->selector = rhs->selector;
 709    lhs->base = rhs->base;
 710    lhs->limit = rhs->limit;
 711    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 712    lhs->present = (flags & DESC_P_MASK) != 0;
 713    lhs->dpl = rhs->selector & 3;
 714    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 715    lhs->desc = (flags & DESC_S_MASK) != 0;
 716    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 717    lhs->granularity = (flags & DESC_G_MASK) != 0;
 718    lhs->available = (flags & DESC_AVL_MASK) != 0;
 719}
 720
 721static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 722{
 723    target_ulong reg = *hax_reg;
 724
 725    if (set) {
 726        *hax_reg = *qemu_reg;
 727    } else {
 728        *qemu_reg = reg;
 729    }
 730}
 731
 732/* The sregs has been synced with HAX kernel already before this call */
 733static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 734{
 735    get_seg(&env->segs[R_CS], &sregs->_cs);
 736    get_seg(&env->segs[R_DS], &sregs->_ds);
 737    get_seg(&env->segs[R_ES], &sregs->_es);
 738    get_seg(&env->segs[R_FS], &sregs->_fs);
 739    get_seg(&env->segs[R_GS], &sregs->_gs);
 740    get_seg(&env->segs[R_SS], &sregs->_ss);
 741
 742    get_seg(&env->tr, &sregs->_tr);
 743    get_seg(&env->ldt, &sregs->_ldt);
 744    env->idt.limit = sregs->_idt.limit;
 745    env->idt.base = sregs->_idt.base;
 746    env->gdt.limit = sregs->_gdt.limit;
 747    env->gdt.base = sregs->_gdt.base;
 748    return 0;
 749}
 750
 751static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 752{
 753    if ((env->eflags & VM_MASK)) {
 754        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 755        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 756        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 757        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 758        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 759        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 760    } else {
 761        set_seg(&sregs->_cs, &env->segs[R_CS]);
 762        set_seg(&sregs->_ds, &env->segs[R_DS]);
 763        set_seg(&sregs->_es, &env->segs[R_ES]);
 764        set_seg(&sregs->_fs, &env->segs[R_FS]);
 765        set_seg(&sregs->_gs, &env->segs[R_GS]);
 766        set_seg(&sregs->_ss, &env->segs[R_SS]);
 767
 768        if (env->cr[0] & CR0_PE_MASK) {
 769            /* force ss cpl to cs cpl */
 770            sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 771                                  (sregs->_cs.selector & 3);
 772            sregs->_ss.dpl = sregs->_ss.selector & 3;
 773        }
 774    }
 775
 776    set_seg(&sregs->_tr, &env->tr);
 777    set_seg(&sregs->_ldt, &env->ldt);
 778    sregs->_idt.limit = env->idt.limit;
 779    sregs->_idt.base = env->idt.base;
 780    sregs->_gdt.limit = env->gdt.limit;
 781    sregs->_gdt.base = env->gdt.base;
 782    return 0;
 783}
 784
 785/*
 786 * After get the state from the kernel module, some
 787 * qemu emulator state need be updated also
 788 */
 789static int hax_setup_qemu_emulator(CPUArchState *env)
 790{
 791
 792#define HFLAG_COPY_MASK (~( \
 793  HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
 794  HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
 795  HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
 796  HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK))
 797
 798    uint32_t hflags;
 799
 800    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
 801    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
 802    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
 803        (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
 804    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
 805    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
 806              (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
 807
 808    if (env->efer & MSR_EFER_LMA) {
 809        hflags |= HF_LMA_MASK;
 810    }
 811
 812    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
 813        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
 814    } else {
 815        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
 816            (DESC_B_SHIFT - HF_CS32_SHIFT);
 817        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
 818            (DESC_B_SHIFT - HF_SS32_SHIFT);
 819        if (!(env->cr[0] & CR0_PE_MASK) ||
 820            (env->eflags & VM_MASK) || !(hflags & HF_CS32_MASK)) {
 821            hflags |= HF_ADDSEG_MASK;
 822        } else {
 823            hflags |= ((env->segs[R_DS].base |
 824                        env->segs[R_ES].base |
 825                        env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
 826        }
 827    }
 828
 829    hflags &= ~HF_SMM_MASK;
 830
 831    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
 832    return 0;
 833}
 834
 835static int hax_sync_vcpu_register(CPUArchState *env, int set)
 836{
 837    struct vcpu_state_t regs;
 838    int ret;
 839    memset(&regs, 0, sizeof(struct vcpu_state_t));
 840
 841    if (!set) {
 842        ret = hax_sync_vcpu_state(env, &regs, 0);
 843        if (ret < 0) {
 844            return -1;
 845        }
 846    }
 847
 848    /* generic register */
 849    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 850    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 851    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 852    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 853    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 854    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 855    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 856    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 857#ifdef TARGET_X86_64
 858    hax_getput_reg(&regs._r8, &env->regs[8], set);
 859    hax_getput_reg(&regs._r9, &env->regs[9], set);
 860    hax_getput_reg(&regs._r10, &env->regs[10], set);
 861    hax_getput_reg(&regs._r11, &env->regs[11], set);
 862    hax_getput_reg(&regs._r12, &env->regs[12], set);
 863    hax_getput_reg(&regs._r13, &env->regs[13], set);
 864    hax_getput_reg(&regs._r14, &env->regs[14], set);
 865    hax_getput_reg(&regs._r15, &env->regs[15], set);
 866#endif
 867    hax_getput_reg(&regs._rflags, &env->eflags, set);
 868    hax_getput_reg(&regs._rip, &env->eip, set);
 869
 870    if (set) {
 871        regs._cr0 = env->cr[0];
 872        regs._cr2 = env->cr[2];
 873        regs._cr3 = env->cr[3];
 874        regs._cr4 = env->cr[4];
 875        hax_set_segments(env, &regs);
 876    } else {
 877        env->cr[0] = regs._cr0;
 878        env->cr[2] = regs._cr2;
 879        env->cr[3] = regs._cr3;
 880        env->cr[4] = regs._cr4;
 881        hax_get_segments(env, &regs);
 882    }
 883
 884    if (set) {
 885        ret = hax_sync_vcpu_state(env, &regs, 1);
 886        if (ret < 0) {
 887            return -1;
 888        }
 889    }
 890    if (!set) {
 891        hax_setup_qemu_emulator(env);
 892    }
 893    return 0;
 894}
 895
 896static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 897                              uint64_t value)
 898{
 899    item->entry = index;
 900    item->value = value;
 901}
 902
 903static int hax_get_msrs(CPUArchState *env)
 904{
 905    struct hax_msr_data md;
 906    struct vmx_msr *msrs = md.entries;
 907    int ret, i, n;
 908
 909    n = 0;
 910    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 911    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 912    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 913    msrs[n++].entry = MSR_IA32_TSC;
 914#ifdef TARGET_X86_64
 915    msrs[n++].entry = MSR_EFER;
 916    msrs[n++].entry = MSR_STAR;
 917    msrs[n++].entry = MSR_LSTAR;
 918    msrs[n++].entry = MSR_CSTAR;
 919    msrs[n++].entry = MSR_FMASK;
 920    msrs[n++].entry = MSR_KERNELGSBASE;
 921#endif
 922    md.nr_msr = n;
 923    ret = hax_sync_msr(env, &md, 0);
 924    if (ret < 0) {
 925        return ret;
 926    }
 927
 928    for (i = 0; i < md.done; i++) {
 929        switch (msrs[i].entry) {
 930        case MSR_IA32_SYSENTER_CS:
 931            env->sysenter_cs = msrs[i].value;
 932            break;
 933        case MSR_IA32_SYSENTER_ESP:
 934            env->sysenter_esp = msrs[i].value;
 935            break;
 936        case MSR_IA32_SYSENTER_EIP:
 937            env->sysenter_eip = msrs[i].value;
 938            break;
 939        case MSR_IA32_TSC:
 940            env->tsc = msrs[i].value;
 941            break;
 942#ifdef TARGET_X86_64
 943        case MSR_EFER:
 944            env->efer = msrs[i].value;
 945            break;
 946        case MSR_STAR:
 947            env->star = msrs[i].value;
 948            break;
 949        case MSR_LSTAR:
 950            env->lstar = msrs[i].value;
 951            break;
 952        case MSR_CSTAR:
 953            env->cstar = msrs[i].value;
 954            break;
 955        case MSR_FMASK:
 956            env->fmask = msrs[i].value;
 957            break;
 958        case MSR_KERNELGSBASE:
 959            env->kernelgsbase = msrs[i].value;
 960            break;
 961#endif
 962        }
 963    }
 964
 965    return 0;
 966}
 967
 968static int hax_set_msrs(CPUArchState *env)
 969{
 970    struct hax_msr_data md;
 971    struct vmx_msr *msrs;
 972    msrs = md.entries;
 973    int n = 0;
 974
 975    memset(&md, 0, sizeof(struct hax_msr_data));
 976    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 977    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 978    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 979    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 980#ifdef TARGET_X86_64
 981    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 982    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 983    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 984    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 985    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 986    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 987#endif
 988    md.nr_msr = n;
 989    md.done = 0;
 990
 991    return hax_sync_msr(env, &md, 1);
 992}
 993
 994static int hax_get_fpu(CPUArchState *env)
 995{
 996    struct fx_layout fpu;
 997    int i, ret;
 998
 999    ret = hax_sync_fpu(env, &fpu, 0);
1000    if (ret < 0) {
1001        return ret;
1002    }
1003
1004    env->fpstt = (fpu.fsw >> 11) & 7;
1005    env->fpus = fpu.fsw;
1006    env->fpuc = fpu.fcw;
1007    for (i = 0; i < 8; ++i) {
1008        env->fptags[i] = !((fpu.ftw >> i) & 1);
1009    }
1010    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
1011
1012    for (i = 0; i < 8; i++) {
1013        env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
1014        env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
1015        if (CPU_NB_REGS > 8) {
1016            env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
1017            env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
1018        }
1019    }
1020    env->mxcsr = fpu.mxcsr;
1021
1022    return 0;
1023}
1024
1025static int hax_set_fpu(CPUArchState *env)
1026{
1027    struct fx_layout fpu;
1028    int i;
1029
1030    memset(&fpu, 0, sizeof(fpu));
1031    fpu.fsw = env->fpus & ~(7 << 11);
1032    fpu.fsw |= (env->fpstt & 7) << 11;
1033    fpu.fcw = env->fpuc;
1034
1035    for (i = 0; i < 8; ++i) {
1036        fpu.ftw |= (!env->fptags[i]) << i;
1037    }
1038
1039    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1040    for (i = 0; i < 8; i++) {
1041        stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1042        stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1043        if (CPU_NB_REGS > 8) {
1044            stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1045            stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1046        }
1047    }
1048
1049    fpu.mxcsr = env->mxcsr;
1050
1051    return hax_sync_fpu(env, &fpu, 1);
1052}
1053
1054static int hax_arch_get_registers(CPUArchState *env)
1055{
1056    int ret;
1057
1058    ret = hax_sync_vcpu_register(env, 0);
1059    if (ret < 0) {
1060        return ret;
1061    }
1062
1063    ret = hax_get_fpu(env);
1064    if (ret < 0) {
1065        return ret;
1066    }
1067
1068    ret = hax_get_msrs(env);
1069    if (ret < 0) {
1070        return ret;
1071    }
1072
1073    return 0;
1074}
1075
1076static int hax_arch_set_registers(CPUArchState *env)
1077{
1078    int ret;
1079    ret = hax_sync_vcpu_register(env, 1);
1080
1081    if (ret < 0) {
1082        fprintf(stderr, "Failed to sync vcpu reg\n");
1083        return ret;
1084    }
1085    ret = hax_set_fpu(env);
1086    if (ret < 0) {
1087        fprintf(stderr, "FPU failed\n");
1088        return ret;
1089    }
1090    ret = hax_set_msrs(env);
1091    if (ret < 0) {
1092        fprintf(stderr, "MSR failed\n");
1093        return ret;
1094    }
1095
1096    return 0;
1097}
1098
1099static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1100{
1101    if (hax_enabled()) {
1102        if (modified) {
1103            hax_arch_set_registers(env);
1104        } else {
1105            hax_arch_get_registers(env);
1106        }
1107    }
1108}
1109
1110/*
1111 * much simpler than kvm, at least in first stage because:
1112 * We don't need consider the device pass-through, we don't need
1113 * consider the framebuffer, and we may even remove the bios at all
1114 */
1115int hax_sync_vcpus(void)
1116{
1117    if (hax_enabled()) {
1118        CPUState *cpu;
1119
1120        cpu = first_cpu;
1121        if (!cpu) {
1122            return 0;
1123        }
1124
1125        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1126            int ret;
1127
1128            ret = hax_arch_set_registers(cpu->env_ptr);
1129            if (ret < 0) {
1130                return ret;
1131            }
1132        }
1133    }
1134
1135    return 0;
1136}
1137
1138void hax_reset_vcpu_state(void *opaque)
1139{
1140    CPUState *cpu;
1141    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1142        cpu->hax_vcpu->tunnel->user_event_pending = 0;
1143        cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1144    }
1145}
1146
1147static void hax_accel_class_init(ObjectClass *oc, void *data)
1148{
1149    AccelClass *ac = ACCEL_CLASS(oc);
1150    ac->name = "HAX";
1151    ac->init_machine = hax_accel_init;
1152    ac->allowed = &hax_allowed;
1153}
1154
1155static const TypeInfo hax_accel_type = {
1156    .name = ACCEL_CLASS_NAME("hax"),
1157    .parent = TYPE_ACCEL,
1158    .class_init = hax_accel_class_init,
1159};
1160
1161static void hax_type_init(void)
1162{
1163    type_register_static(&hax_accel_type);
1164}
1165
1166type_init(hax_type_init);
1167