qemu/target/arm/kvm.c
<<
>>
Prefs
   1/*
   2 * ARM implementation of KVM hooks
   3 *
   4 * Copyright Christoffer Dall 2009-2010
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include "qemu/osdep.h"
  12#include <sys/ioctl.h>
  13
  14#include <linux/kvm.h>
  15
  16#include "qemu-common.h"
  17#include "qemu/timer.h"
  18#include "qemu/error-report.h"
  19#include "sysemu/sysemu.h"
  20#include "sysemu/kvm.h"
  21#include "kvm_arm.h"
  22#include "cpu.h"
  23#include "internals.h"
  24#include "hw/arm/arm.h"
  25#include "exec/memattrs.h"
  26#include "exec/address-spaces.h"
  27#include "hw/boards.h"
  28#include "qemu/log.h"
  29
  30const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  31    KVM_CAP_LAST_INFO
  32};
  33
  34static bool cap_has_mp_state;
  35
  36int kvm_arm_vcpu_init(CPUState *cs)
  37{
  38    ARMCPU *cpu = ARM_CPU(cs);
  39    struct kvm_vcpu_init init;
  40
  41    init.target = cpu->kvm_target;
  42    memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
  43
  44    return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
  45}
  46
  47bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
  48                                      int *fdarray,
  49                                      struct kvm_vcpu_init *init)
  50{
  51    int ret, kvmfd = -1, vmfd = -1, cpufd = -1;
  52
  53    kvmfd = qemu_open("/dev/kvm", O_RDWR);
  54    if (kvmfd < 0) {
  55        goto err;
  56    }
  57    vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
  58    if (vmfd < 0) {
  59        goto err;
  60    }
  61    cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
  62    if (cpufd < 0) {
  63        goto err;
  64    }
  65
  66    if (!init) {
  67        /* Caller doesn't want the VCPU to be initialized, so skip it */
  68        goto finish;
  69    }
  70
  71    ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init);
  72    if (ret >= 0) {
  73        ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
  74        if (ret < 0) {
  75            goto err;
  76        }
  77    } else if (cpus_to_try) {
  78        /* Old kernel which doesn't know about the
  79         * PREFERRED_TARGET ioctl: we know it will only support
  80         * creating one kind of guest CPU which is its preferred
  81         * CPU type.
  82         */
  83        while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
  84            init->target = *cpus_to_try++;
  85            memset(init->features, 0, sizeof(init->features));
  86            ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
  87            if (ret >= 0) {
  88                break;
  89            }
  90        }
  91        if (ret < 0) {
  92            goto err;
  93        }
  94    } else {
  95        /* Treat a NULL cpus_to_try argument the same as an empty
  96         * list, which means we will fail the call since this must
  97         * be an old kernel which doesn't support PREFERRED_TARGET.
  98         */
  99        goto err;
 100    }
 101
 102finish:
 103    fdarray[0] = kvmfd;
 104    fdarray[1] = vmfd;
 105    fdarray[2] = cpufd;
 106
 107    return true;
 108
 109err:
 110    if (cpufd >= 0) {
 111        close(cpufd);
 112    }
 113    if (vmfd >= 0) {
 114        close(vmfd);
 115    }
 116    if (kvmfd >= 0) {
 117        close(kvmfd);
 118    }
 119
 120    return false;
 121}
 122
 123void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
 124{
 125    int i;
 126
 127    for (i = 2; i >= 0; i--) {
 128        close(fdarray[i]);
 129    }
 130}
 131
 132static void kvm_arm_host_cpu_class_init(ObjectClass *oc, void *data)
 133{
 134    ARMHostCPUClass *ahcc = ARM_HOST_CPU_CLASS(oc);
 135
 136    /* All we really need to set up for the 'host' CPU
 137     * is the feature bits -- we rely on the fact that the
 138     * various ID register values in ARMCPU are only used for
 139     * TCG CPUs.
 140     */
 141    if (!kvm_arm_get_host_cpu_features(ahcc)) {
 142        fprintf(stderr, "Failed to retrieve host CPU features!\n");
 143        abort();
 144    }
 145}
 146
 147static void kvm_arm_host_cpu_initfn(Object *obj)
 148{
 149    ARMHostCPUClass *ahcc = ARM_HOST_CPU_GET_CLASS(obj);
 150    ARMCPU *cpu = ARM_CPU(obj);
 151    CPUARMState *env = &cpu->env;
 152
 153    cpu->kvm_target = ahcc->target;
 154    cpu->dtb_compatible = ahcc->dtb_compatible;
 155    env->features = ahcc->features;
 156}
 157
 158static const TypeInfo host_arm_cpu_type_info = {
 159    .name = TYPE_ARM_HOST_CPU,
 160#ifdef TARGET_AARCH64
 161    .parent = TYPE_AARCH64_CPU,
 162#else
 163    .parent = TYPE_ARM_CPU,
 164#endif
 165    .instance_init = kvm_arm_host_cpu_initfn,
 166    .class_init = kvm_arm_host_cpu_class_init,
 167    .class_size = sizeof(ARMHostCPUClass),
 168};
 169
 170int kvm_arch_init(MachineState *ms, KVMState *s)
 171{
 172    /* For ARM interrupt delivery is always asynchronous,
 173     * whether we are using an in-kernel VGIC or not.
 174     */
 175    kvm_async_interrupts_allowed = true;
 176
 177    /*
 178     * PSCI wakes up secondary cores, so we always need to
 179     * have vCPUs waiting in kernel space
 180     */
 181    kvm_halt_in_kernel_allowed = true;
 182
 183    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
 184
 185    type_register_static(&host_arm_cpu_type_info);
 186
 187    return 0;
 188}
 189
 190unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 191{
 192    return cpu->cpu_index;
 193}
 194
 195/* We track all the KVM devices which need their memory addresses
 196 * passing to the kernel in a list of these structures.
 197 * When board init is complete we run through the list and
 198 * tell the kernel the base addresses of the memory regions.
 199 * We use a MemoryListener to track mapping and unmapping of
 200 * the regions during board creation, so the board models don't
 201 * need to do anything special for the KVM case.
 202 */
 203typedef struct KVMDevice {
 204    struct kvm_arm_device_addr kda;
 205    struct kvm_device_attr kdattr;
 206    MemoryRegion *mr;
 207    QSLIST_ENTRY(KVMDevice) entries;
 208    int dev_fd;
 209} KVMDevice;
 210
 211static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
 212
 213static void kvm_arm_devlistener_add(MemoryListener *listener,
 214                                    MemoryRegionSection *section)
 215{
 216    KVMDevice *kd;
 217
 218    QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
 219        if (section->mr == kd->mr) {
 220            kd->kda.addr = section->offset_within_address_space;
 221        }
 222    }
 223}
 224
 225static void kvm_arm_devlistener_del(MemoryListener *listener,
 226                                    MemoryRegionSection *section)
 227{
 228    KVMDevice *kd;
 229
 230    QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
 231        if (section->mr == kd->mr) {
 232            kd->kda.addr = -1;
 233        }
 234    }
 235}
 236
 237static MemoryListener devlistener = {
 238    .region_add = kvm_arm_devlistener_add,
 239    .region_del = kvm_arm_devlistener_del,
 240};
 241
 242static void kvm_arm_set_device_addr(KVMDevice *kd)
 243{
 244    struct kvm_device_attr *attr = &kd->kdattr;
 245    int ret;
 246
 247    /* If the device control API is available and we have a device fd on the
 248     * KVMDevice struct, let's use the newer API
 249     */
 250    if (kd->dev_fd >= 0) {
 251        uint64_t addr = kd->kda.addr;
 252        attr->addr = (uintptr_t)&addr;
 253        ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
 254    } else {
 255        ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
 256    }
 257
 258    if (ret < 0) {
 259        fprintf(stderr, "Failed to set device address: %s\n",
 260                strerror(-ret));
 261        abort();
 262    }
 263}
 264
 265static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
 266{
 267    KVMDevice *kd, *tkd;
 268
 269    memory_listener_unregister(&devlistener);
 270    QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
 271        if (kd->kda.addr != -1) {
 272            kvm_arm_set_device_addr(kd);
 273        }
 274        memory_region_unref(kd->mr);
 275        g_free(kd);
 276    }
 277}
 278
 279static Notifier notify = {
 280    .notify = kvm_arm_machine_init_done,
 281};
 282
 283void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
 284                             uint64_t attr, int dev_fd)
 285{
 286    KVMDevice *kd;
 287
 288    if (!kvm_irqchip_in_kernel()) {
 289        return;
 290    }
 291
 292    if (QSLIST_EMPTY(&kvm_devices_head)) {
 293        memory_listener_register(&devlistener, &address_space_memory);
 294        qemu_add_machine_init_done_notifier(&notify);
 295    }
 296    kd = g_new0(KVMDevice, 1);
 297    kd->mr = mr;
 298    kd->kda.id = devid;
 299    kd->kda.addr = -1;
 300    kd->kdattr.flags = 0;
 301    kd->kdattr.group = group;
 302    kd->kdattr.attr = attr;
 303    kd->dev_fd = dev_fd;
 304    QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
 305    memory_region_ref(kd->mr);
 306}
 307
 308static int compare_u64(const void *a, const void *b)
 309{
 310    if (*(uint64_t *)a > *(uint64_t *)b) {
 311        return 1;
 312    }
 313    if (*(uint64_t *)a < *(uint64_t *)b) {
 314        return -1;
 315    }
 316    return 0;
 317}
 318
 319/* Initialize the CPUState's cpreg list according to the kernel's
 320 * definition of what CPU registers it knows about (and throw away
 321 * the previous TCG-created cpreg list).
 322 */
 323int kvm_arm_init_cpreg_list(ARMCPU *cpu)
 324{
 325    struct kvm_reg_list rl;
 326    struct kvm_reg_list *rlp;
 327    int i, ret, arraylen;
 328    CPUState *cs = CPU(cpu);
 329
 330    rl.n = 0;
 331    ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
 332    if (ret != -E2BIG) {
 333        return ret;
 334    }
 335    rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
 336    rlp->n = rl.n;
 337    ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
 338    if (ret) {
 339        goto out;
 340    }
 341    /* Sort the list we get back from the kernel, since cpreg_tuples
 342     * must be in strictly ascending order.
 343     */
 344    qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
 345
 346    for (i = 0, arraylen = 0; i < rlp->n; i++) {
 347        if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) {
 348            continue;
 349        }
 350        switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
 351        case KVM_REG_SIZE_U32:
 352        case KVM_REG_SIZE_U64:
 353            break;
 354        default:
 355            fprintf(stderr, "Can't handle size of register in kernel list\n");
 356            ret = -EINVAL;
 357            goto out;
 358        }
 359
 360        arraylen++;
 361    }
 362
 363    cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen);
 364    cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen);
 365    cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
 366                                         arraylen);
 367    cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
 368                                        arraylen);
 369    cpu->cpreg_array_len = arraylen;
 370    cpu->cpreg_vmstate_array_len = arraylen;
 371
 372    for (i = 0, arraylen = 0; i < rlp->n; i++) {
 373        uint64_t regidx = rlp->reg[i];
 374        if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) {
 375            continue;
 376        }
 377        cpu->cpreg_indexes[arraylen] = regidx;
 378        arraylen++;
 379    }
 380    assert(cpu->cpreg_array_len == arraylen);
 381
 382    if (!write_kvmstate_to_list(cpu)) {
 383        /* Shouldn't happen unless kernel is inconsistent about
 384         * what registers exist.
 385         */
 386        fprintf(stderr, "Initial read of kernel register state failed\n");
 387        ret = -EINVAL;
 388        goto out;
 389    }
 390
 391out:
 392    g_free(rlp);
 393    return ret;
 394}
 395
 396bool write_kvmstate_to_list(ARMCPU *cpu)
 397{
 398    CPUState *cs = CPU(cpu);
 399    int i;
 400    bool ok = true;
 401
 402    for (i = 0; i < cpu->cpreg_array_len; i++) {
 403        struct kvm_one_reg r;
 404        uint64_t regidx = cpu->cpreg_indexes[i];
 405        uint32_t v32;
 406        int ret;
 407
 408        r.id = regidx;
 409
 410        switch (regidx & KVM_REG_SIZE_MASK) {
 411        case KVM_REG_SIZE_U32:
 412            r.addr = (uintptr_t)&v32;
 413            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
 414            if (!ret) {
 415                cpu->cpreg_values[i] = v32;
 416            }
 417            break;
 418        case KVM_REG_SIZE_U64:
 419            r.addr = (uintptr_t)(cpu->cpreg_values + i);
 420            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
 421            break;
 422        default:
 423            abort();
 424        }
 425        if (ret) {
 426            ok = false;
 427        }
 428    }
 429    return ok;
 430}
 431
 432bool write_list_to_kvmstate(ARMCPU *cpu, int level)
 433{
 434    CPUState *cs = CPU(cpu);
 435    int i;
 436    bool ok = true;
 437
 438    for (i = 0; i < cpu->cpreg_array_len; i++) {
 439        struct kvm_one_reg r;
 440        uint64_t regidx = cpu->cpreg_indexes[i];
 441        uint32_t v32;
 442        int ret;
 443
 444        if (kvm_arm_cpreg_level(regidx) > level) {
 445            continue;
 446        }
 447
 448        r.id = regidx;
 449        switch (regidx & KVM_REG_SIZE_MASK) {
 450        case KVM_REG_SIZE_U32:
 451            v32 = cpu->cpreg_values[i];
 452            r.addr = (uintptr_t)&v32;
 453            break;
 454        case KVM_REG_SIZE_U64:
 455            r.addr = (uintptr_t)(cpu->cpreg_values + i);
 456            break;
 457        default:
 458            abort();
 459        }
 460        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
 461        if (ret) {
 462            /* We might fail for "unknown register" and also for
 463             * "you tried to set a register which is constant with
 464             * a different value from what it actually contains".
 465             */
 466            ok = false;
 467        }
 468    }
 469    return ok;
 470}
 471
 472void kvm_arm_reset_vcpu(ARMCPU *cpu)
 473{
 474    int ret;
 475
 476    /* Re-init VCPU so that all registers are set to
 477     * their respective reset values.
 478     */
 479    ret = kvm_arm_vcpu_init(CPU(cpu));
 480    if (ret < 0) {
 481        fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
 482        abort();
 483    }
 484    if (!write_kvmstate_to_list(cpu)) {
 485        fprintf(stderr, "write_kvmstate_to_list failed\n");
 486        abort();
 487    }
 488}
 489
 490/*
 491 * Update KVM's MP_STATE based on what QEMU thinks it is
 492 */
 493int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
 494{
 495    if (cap_has_mp_state) {
 496        struct kvm_mp_state mp_state = {
 497            .mp_state = (cpu->power_state == PSCI_OFF) ?
 498            KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
 499        };
 500        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
 501        if (ret) {
 502            fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
 503                    __func__, ret, strerror(-ret));
 504            return -1;
 505        }
 506    }
 507
 508    return 0;
 509}
 510
 511/*
 512 * Sync the KVM MP_STATE into QEMU
 513 */
 514int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
 515{
 516    if (cap_has_mp_state) {
 517        struct kvm_mp_state mp_state;
 518        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
 519        if (ret) {
 520            fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n",
 521                    __func__, ret, strerror(-ret));
 522            abort();
 523        }
 524        cpu->power_state = (mp_state.mp_state == KVM_MP_STATE_STOPPED) ?
 525            PSCI_OFF : PSCI_ON;
 526    }
 527
 528    return 0;
 529}
 530
 531void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 532{
 533}
 534
 535MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
 536{
 537    ARMCPU *cpu;
 538    uint32_t switched_level;
 539
 540    if (kvm_irqchip_in_kernel()) {
 541        /*
 542         * We only need to sync timer states with user-space interrupt
 543         * controllers, so return early and save cycles if we don't.
 544         */
 545        return MEMTXATTRS_UNSPECIFIED;
 546    }
 547
 548    cpu = ARM_CPU(cs);
 549
 550    /* Synchronize our shadowed in-kernel device irq lines with the kvm ones */
 551    if (run->s.regs.device_irq_level != cpu->device_irq_level) {
 552        switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level;
 553
 554        qemu_mutex_lock_iothread();
 555
 556        if (switched_level & KVM_ARM_DEV_EL1_VTIMER) {
 557            qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT],
 558                         !!(run->s.regs.device_irq_level &
 559                            KVM_ARM_DEV_EL1_VTIMER));
 560            switched_level &= ~KVM_ARM_DEV_EL1_VTIMER;
 561        }
 562
 563        if (switched_level & KVM_ARM_DEV_EL1_PTIMER) {
 564            qemu_set_irq(cpu->gt_timer_outputs[GTIMER_PHYS],
 565                         !!(run->s.regs.device_irq_level &
 566                            KVM_ARM_DEV_EL1_PTIMER));
 567            switched_level &= ~KVM_ARM_DEV_EL1_PTIMER;
 568        }
 569
 570        /* XXX PMU IRQ is missing */
 571
 572        if (switched_level) {
 573            qemu_log_mask(LOG_UNIMP, "%s: unhandled in-kernel device IRQ %x\n",
 574                          __func__, switched_level);
 575        }
 576
 577        /* We also mark unknown levels as processed to not waste cycles */
 578        cpu->device_irq_level = run->s.regs.device_irq_level;
 579        qemu_mutex_unlock_iothread();
 580    }
 581
 582    return MEMTXATTRS_UNSPECIFIED;
 583}
 584
 585
 586int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 587{
 588    int ret = 0;
 589
 590    switch (run->exit_reason) {
 591    case KVM_EXIT_DEBUG:
 592        if (kvm_arm_handle_debug(cs, &run->debug.arch)) {
 593            ret = EXCP_DEBUG;
 594        } /* otherwise return to guest */
 595        break;
 596    default:
 597        qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
 598                      __func__, run->exit_reason);
 599        break;
 600    }
 601    return ret;
 602}
 603
 604bool kvm_arch_stop_on_emulation_error(CPUState *cs)
 605{
 606    return true;
 607}
 608
 609int kvm_arch_process_async_events(CPUState *cs)
 610{
 611    return 0;
 612}
 613
 614/* The #ifdef protections are until 32bit headers are imported and can
 615 * be removed once both 32 and 64 bit reach feature parity.
 616 */
 617void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
 618{
 619#ifdef KVM_GUESTDBG_USE_SW_BP
 620    if (kvm_sw_breakpoints_active(cs)) {
 621        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
 622    }
 623#endif
 624#ifdef KVM_GUESTDBG_USE_HW
 625    if (kvm_arm_hw_debug_active(cs)) {
 626        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
 627        kvm_arm_copy_hw_debug_data(&dbg->arch);
 628    }
 629#endif
 630}
 631
 632void kvm_arch_init_irq_routing(KVMState *s)
 633{
 634}
 635
 636int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 637{
 638     if (machine_kernel_irqchip_split(ms)) {
 639         perror("-machine kernel_irqchip=split is not supported on ARM.");
 640         exit(1);
 641    }
 642
 643    /* If we can create the VGIC using the newer device control API, we
 644     * let the device do this when it initializes itself, otherwise we
 645     * fall back to the old API */
 646    return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
 647}
 648
 649int kvm_arm_vgic_probe(void)
 650{
 651    if (kvm_create_device(kvm_state,
 652                          KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
 653        return 3;
 654    } else if (kvm_create_device(kvm_state,
 655                                 KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
 656        return 2;
 657    } else {
 658        return 0;
 659    }
 660}
 661
 662int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
 663                             uint64_t address, uint32_t data, PCIDevice *dev)
 664{
 665    return 0;
 666}
 667
 668int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
 669                                int vector, PCIDevice *dev)
 670{
 671    return 0;
 672}
 673
 674int kvm_arch_release_virq_post(int virq)
 675{
 676    return 0;
 677}
 678
 679int kvm_arch_msi_data_to_gsi(uint32_t data)
 680{
 681    return (data - 32) & 0xffff;
 682}
 683