qemu/target-arm/kvm.c
<<
>>
Prefs
   1/*
   2 * ARM implementation of KVM hooks
   3 *
   4 * Copyright Christoffer Dall 2009-2010
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7 * See the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include "qemu/osdep.h"
  12#include <sys/ioctl.h>
  13
  14#include <linux/kvm.h>
  15
  16#include "qemu-common.h"
  17#include "qemu/timer.h"
  18#include "qemu/error-report.h"
  19#include "sysemu/sysemu.h"
  20#include "sysemu/kvm.h"
  21#include "kvm_arm.h"
  22#include "cpu.h"
  23#include "internals.h"
  24#include "hw/arm/arm.h"
  25#include "exec/memattrs.h"
  26#include "exec/address-spaces.h"
  27#include "hw/boards.h"
  28#include "qemu/log.h"
  29
  30const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  31    KVM_CAP_LAST_INFO
  32};
  33
  34static bool cap_has_mp_state;
  35
  36int kvm_arm_vcpu_init(CPUState *cs)
  37{
  38    ARMCPU *cpu = ARM_CPU(cs);
  39    struct kvm_vcpu_init init;
  40
  41    init.target = cpu->kvm_target;
  42    memcpy(init.features, cpu->kvm_init_features, sizeof(init.features));
  43
  44    return kvm_vcpu_ioctl(cs, KVM_ARM_VCPU_INIT, &init);
  45}
  46
  47bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
  48                                      int *fdarray,
  49                                      struct kvm_vcpu_init *init)
  50{
  51    int ret, kvmfd = -1, vmfd = -1, cpufd = -1;
  52
  53    kvmfd = qemu_open("/dev/kvm", O_RDWR);
  54    if (kvmfd < 0) {
  55        goto err;
  56    }
  57    vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
  58    if (vmfd < 0) {
  59        goto err;
  60    }
  61    cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
  62    if (cpufd < 0) {
  63        goto err;
  64    }
  65
  66    if (!init) {
  67        /* Caller doesn't want the VCPU to be initialized, so skip it */
  68        goto finish;
  69    }
  70
  71    ret = ioctl(vmfd, KVM_ARM_PREFERRED_TARGET, init);
  72    if (ret >= 0) {
  73        ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
  74        if (ret < 0) {
  75            goto err;
  76        }
  77    } else if (cpus_to_try) {
  78        /* Old kernel which doesn't know about the
  79         * PREFERRED_TARGET ioctl: we know it will only support
  80         * creating one kind of guest CPU which is its preferred
  81         * CPU type.
  82         */
  83        while (*cpus_to_try != QEMU_KVM_ARM_TARGET_NONE) {
  84            init->target = *cpus_to_try++;
  85            memset(init->features, 0, sizeof(init->features));
  86            ret = ioctl(cpufd, KVM_ARM_VCPU_INIT, init);
  87            if (ret >= 0) {
  88                break;
  89            }
  90        }
  91        if (ret < 0) {
  92            goto err;
  93        }
  94    } else {
  95        /* Treat a NULL cpus_to_try argument the same as an empty
  96         * list, which means we will fail the call since this must
  97         * be an old kernel which doesn't support PREFERRED_TARGET.
  98         */
  99        goto err;
 100    }
 101
 102finish:
 103    fdarray[0] = kvmfd;
 104    fdarray[1] = vmfd;
 105    fdarray[2] = cpufd;
 106
 107    return true;
 108
 109err:
 110    if (cpufd >= 0) {
 111        close(cpufd);
 112    }
 113    if (vmfd >= 0) {
 114        close(vmfd);
 115    }
 116    if (kvmfd >= 0) {
 117        close(kvmfd);
 118    }
 119
 120    return false;
 121}
 122
 123void kvm_arm_destroy_scratch_host_vcpu(int *fdarray)
 124{
 125    int i;
 126
 127    for (i = 2; i >= 0; i--) {
 128        close(fdarray[i]);
 129    }
 130}
 131
 132static void kvm_arm_host_cpu_class_init(ObjectClass *oc, void *data)
 133{
 134    ARMHostCPUClass *ahcc = ARM_HOST_CPU_CLASS(oc);
 135
 136    /* All we really need to set up for the 'host' CPU
 137     * is the feature bits -- we rely on the fact that the
 138     * various ID register values in ARMCPU are only used for
 139     * TCG CPUs.
 140     */
 141    if (!kvm_arm_get_host_cpu_features(ahcc)) {
 142        fprintf(stderr, "Failed to retrieve host CPU features!\n");
 143        abort();
 144    }
 145}
 146
 147static void kvm_arm_host_cpu_initfn(Object *obj)
 148{
 149    ARMHostCPUClass *ahcc = ARM_HOST_CPU_GET_CLASS(obj);
 150    ARMCPU *cpu = ARM_CPU(obj);
 151    CPUARMState *env = &cpu->env;
 152
 153    cpu->kvm_target = ahcc->target;
 154    cpu->dtb_compatible = ahcc->dtb_compatible;
 155    env->features = ahcc->features;
 156}
 157
 158static const TypeInfo host_arm_cpu_type_info = {
 159    .name = TYPE_ARM_HOST_CPU,
 160#ifdef TARGET_AARCH64
 161    .parent = TYPE_AARCH64_CPU,
 162#else
 163    .parent = TYPE_ARM_CPU,
 164#endif
 165    .instance_init = kvm_arm_host_cpu_initfn,
 166    .class_init = kvm_arm_host_cpu_class_init,
 167    .class_size = sizeof(ARMHostCPUClass),
 168};
 169
 170int kvm_arch_init(MachineState *ms, KVMState *s)
 171{
 172    /* For ARM interrupt delivery is always asynchronous,
 173     * whether we are using an in-kernel VGIC or not.
 174     */
 175    kvm_async_interrupts_allowed = true;
 176
 177    cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
 178
 179    type_register_static(&host_arm_cpu_type_info);
 180
 181    return 0;
 182}
 183
 184unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 185{
 186    return cpu->cpu_index;
 187}
 188
 189/* We track all the KVM devices which need their memory addresses
 190 * passing to the kernel in a list of these structures.
 191 * When board init is complete we run through the list and
 192 * tell the kernel the base addresses of the memory regions.
 193 * We use a MemoryListener to track mapping and unmapping of
 194 * the regions during board creation, so the board models don't
 195 * need to do anything special for the KVM case.
 196 */
 197typedef struct KVMDevice {
 198    struct kvm_arm_device_addr kda;
 199    struct kvm_device_attr kdattr;
 200    MemoryRegion *mr;
 201    QSLIST_ENTRY(KVMDevice) entries;
 202    int dev_fd;
 203} KVMDevice;
 204
 205static QSLIST_HEAD(kvm_devices_head, KVMDevice) kvm_devices_head;
 206
 207static void kvm_arm_devlistener_add(MemoryListener *listener,
 208                                    MemoryRegionSection *section)
 209{
 210    KVMDevice *kd;
 211
 212    QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
 213        if (section->mr == kd->mr) {
 214            kd->kda.addr = section->offset_within_address_space;
 215        }
 216    }
 217}
 218
 219static void kvm_arm_devlistener_del(MemoryListener *listener,
 220                                    MemoryRegionSection *section)
 221{
 222    KVMDevice *kd;
 223
 224    QSLIST_FOREACH(kd, &kvm_devices_head, entries) {
 225        if (section->mr == kd->mr) {
 226            kd->kda.addr = -1;
 227        }
 228    }
 229}
 230
 231static MemoryListener devlistener = {
 232    .region_add = kvm_arm_devlistener_add,
 233    .region_del = kvm_arm_devlistener_del,
 234};
 235
 236static void kvm_arm_set_device_addr(KVMDevice *kd)
 237{
 238    struct kvm_device_attr *attr = &kd->kdattr;
 239    int ret;
 240
 241    /* If the device control API is available and we have a device fd on the
 242     * KVMDevice struct, let's use the newer API
 243     */
 244    if (kd->dev_fd >= 0) {
 245        uint64_t addr = kd->kda.addr;
 246        attr->addr = (uintptr_t)&addr;
 247        ret = kvm_device_ioctl(kd->dev_fd, KVM_SET_DEVICE_ATTR, attr);
 248    } else {
 249        ret = kvm_vm_ioctl(kvm_state, KVM_ARM_SET_DEVICE_ADDR, &kd->kda);
 250    }
 251
 252    if (ret < 0) {
 253        fprintf(stderr, "Failed to set device address: %s\n",
 254                strerror(-ret));
 255        abort();
 256    }
 257}
 258
 259static void kvm_arm_machine_init_done(Notifier *notifier, void *data)
 260{
 261    KVMDevice *kd, *tkd;
 262
 263    memory_listener_unregister(&devlistener);
 264    QSLIST_FOREACH_SAFE(kd, &kvm_devices_head, entries, tkd) {
 265        if (kd->kda.addr != -1) {
 266            kvm_arm_set_device_addr(kd);
 267        }
 268        memory_region_unref(kd->mr);
 269        g_free(kd);
 270    }
 271}
 272
 273static Notifier notify = {
 274    .notify = kvm_arm_machine_init_done,
 275};
 276
 277void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
 278                             uint64_t attr, int dev_fd)
 279{
 280    KVMDevice *kd;
 281
 282    if (!kvm_irqchip_in_kernel()) {
 283        return;
 284    }
 285
 286    if (QSLIST_EMPTY(&kvm_devices_head)) {
 287        memory_listener_register(&devlistener, &address_space_memory);
 288        qemu_add_machine_init_done_notifier(&notify);
 289    }
 290    kd = g_new0(KVMDevice, 1);
 291    kd->mr = mr;
 292    kd->kda.id = devid;
 293    kd->kda.addr = -1;
 294    kd->kdattr.flags = 0;
 295    kd->kdattr.group = group;
 296    kd->kdattr.attr = attr;
 297    kd->dev_fd = dev_fd;
 298    QSLIST_INSERT_HEAD(&kvm_devices_head, kd, entries);
 299    memory_region_ref(kd->mr);
 300}
 301
 302static int compare_u64(const void *a, const void *b)
 303{
 304    if (*(uint64_t *)a > *(uint64_t *)b) {
 305        return 1;
 306    }
 307    if (*(uint64_t *)a < *(uint64_t *)b) {
 308        return -1;
 309    }
 310    return 0;
 311}
 312
 313/* Initialize the CPUState's cpreg list according to the kernel's
 314 * definition of what CPU registers it knows about (and throw away
 315 * the previous TCG-created cpreg list).
 316 */
 317int kvm_arm_init_cpreg_list(ARMCPU *cpu)
 318{
 319    struct kvm_reg_list rl;
 320    struct kvm_reg_list *rlp;
 321    int i, ret, arraylen;
 322    CPUState *cs = CPU(cpu);
 323
 324    rl.n = 0;
 325    ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, &rl);
 326    if (ret != -E2BIG) {
 327        return ret;
 328    }
 329    rlp = g_malloc(sizeof(struct kvm_reg_list) + rl.n * sizeof(uint64_t));
 330    rlp->n = rl.n;
 331    ret = kvm_vcpu_ioctl(cs, KVM_GET_REG_LIST, rlp);
 332    if (ret) {
 333        goto out;
 334    }
 335    /* Sort the list we get back from the kernel, since cpreg_tuples
 336     * must be in strictly ascending order.
 337     */
 338    qsort(&rlp->reg, rlp->n, sizeof(rlp->reg[0]), compare_u64);
 339
 340    for (i = 0, arraylen = 0; i < rlp->n; i++) {
 341        if (!kvm_arm_reg_syncs_via_cpreg_list(rlp->reg[i])) {
 342            continue;
 343        }
 344        switch (rlp->reg[i] & KVM_REG_SIZE_MASK) {
 345        case KVM_REG_SIZE_U32:
 346        case KVM_REG_SIZE_U64:
 347            break;
 348        default:
 349            fprintf(stderr, "Can't handle size of register in kernel list\n");
 350            ret = -EINVAL;
 351            goto out;
 352        }
 353
 354        arraylen++;
 355    }
 356
 357    cpu->cpreg_indexes = g_renew(uint64_t, cpu->cpreg_indexes, arraylen);
 358    cpu->cpreg_values = g_renew(uint64_t, cpu->cpreg_values, arraylen);
 359    cpu->cpreg_vmstate_indexes = g_renew(uint64_t, cpu->cpreg_vmstate_indexes,
 360                                         arraylen);
 361    cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
 362                                        arraylen);
 363    cpu->cpreg_array_len = arraylen;
 364    cpu->cpreg_vmstate_array_len = arraylen;
 365
 366    for (i = 0, arraylen = 0; i < rlp->n; i++) {
 367        uint64_t regidx = rlp->reg[i];
 368        if (!kvm_arm_reg_syncs_via_cpreg_list(regidx)) {
 369            continue;
 370        }
 371        cpu->cpreg_indexes[arraylen] = regidx;
 372        arraylen++;
 373    }
 374    assert(cpu->cpreg_array_len == arraylen);
 375
 376    if (!write_kvmstate_to_list(cpu)) {
 377        /* Shouldn't happen unless kernel is inconsistent about
 378         * what registers exist.
 379         */
 380        fprintf(stderr, "Initial read of kernel register state failed\n");
 381        ret = -EINVAL;
 382        goto out;
 383    }
 384
 385out:
 386    g_free(rlp);
 387    return ret;
 388}
 389
 390bool write_kvmstate_to_list(ARMCPU *cpu)
 391{
 392    CPUState *cs = CPU(cpu);
 393    int i;
 394    bool ok = true;
 395
 396    for (i = 0; i < cpu->cpreg_array_len; i++) {
 397        struct kvm_one_reg r;
 398        uint64_t regidx = cpu->cpreg_indexes[i];
 399        uint32_t v32;
 400        int ret;
 401
 402        r.id = regidx;
 403
 404        switch (regidx & KVM_REG_SIZE_MASK) {
 405        case KVM_REG_SIZE_U32:
 406            r.addr = (uintptr_t)&v32;
 407            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
 408            if (!ret) {
 409                cpu->cpreg_values[i] = v32;
 410            }
 411            break;
 412        case KVM_REG_SIZE_U64:
 413            r.addr = (uintptr_t)(cpu->cpreg_values + i);
 414            ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &r);
 415            break;
 416        default:
 417            abort();
 418        }
 419        if (ret) {
 420            ok = false;
 421        }
 422    }
 423    return ok;
 424}
 425
 426bool write_list_to_kvmstate(ARMCPU *cpu, int level)
 427{
 428    CPUState *cs = CPU(cpu);
 429    int i;
 430    bool ok = true;
 431
 432    for (i = 0; i < cpu->cpreg_array_len; i++) {
 433        struct kvm_one_reg r;
 434        uint64_t regidx = cpu->cpreg_indexes[i];
 435        uint32_t v32;
 436        int ret;
 437
 438        if (kvm_arm_cpreg_level(regidx) > level) {
 439            continue;
 440        }
 441
 442        r.id = regidx;
 443        switch (regidx & KVM_REG_SIZE_MASK) {
 444        case KVM_REG_SIZE_U32:
 445            v32 = cpu->cpreg_values[i];
 446            r.addr = (uintptr_t)&v32;
 447            break;
 448        case KVM_REG_SIZE_U64:
 449            r.addr = (uintptr_t)(cpu->cpreg_values + i);
 450            break;
 451        default:
 452            abort();
 453        }
 454        ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &r);
 455        if (ret) {
 456            /* We might fail for "unknown register" and also for
 457             * "you tried to set a register which is constant with
 458             * a different value from what it actually contains".
 459             */
 460            ok = false;
 461        }
 462    }
 463    return ok;
 464}
 465
 466void kvm_arm_reset_vcpu(ARMCPU *cpu)
 467{
 468    int ret;
 469
 470    /* Re-init VCPU so that all registers are set to
 471     * their respective reset values.
 472     */
 473    ret = kvm_arm_vcpu_init(CPU(cpu));
 474    if (ret < 0) {
 475        fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
 476        abort();
 477    }
 478    if (!write_kvmstate_to_list(cpu)) {
 479        fprintf(stderr, "write_kvmstate_to_list failed\n");
 480        abort();
 481    }
 482}
 483
 484/*
 485 * Update KVM's MP_STATE based on what QEMU thinks it is
 486 */
 487int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
 488{
 489    if (cap_has_mp_state) {
 490        struct kvm_mp_state mp_state = {
 491            .mp_state =
 492            cpu->powered_off ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
 493        };
 494        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
 495        if (ret) {
 496            fprintf(stderr, "%s: failed to set MP_STATE %d/%s\n",
 497                    __func__, ret, strerror(-ret));
 498            return -1;
 499        }
 500    }
 501
 502    return 0;
 503}
 504
 505/*
 506 * Sync the KVM MP_STATE into QEMU
 507 */
 508int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
 509{
 510    if (cap_has_mp_state) {
 511        struct kvm_mp_state mp_state;
 512        int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, &mp_state);
 513        if (ret) {
 514            fprintf(stderr, "%s: failed to get MP_STATE %d/%s\n",
 515                    __func__, ret, strerror(-ret));
 516            abort();
 517        }
 518        cpu->powered_off = (mp_state.mp_state == KVM_MP_STATE_STOPPED);
 519    }
 520
 521    return 0;
 522}
 523
 524void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 525{
 526}
 527
 528MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
 529{
 530    return MEMTXATTRS_UNSPECIFIED;
 531}
 532
 533
 534int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 535{
 536    int ret = 0;
 537
 538    switch (run->exit_reason) {
 539    case KVM_EXIT_DEBUG:
 540        if (kvm_arm_handle_debug(cs, &run->debug.arch)) {
 541            ret = EXCP_DEBUG;
 542        } /* otherwise return to guest */
 543        break;
 544    default:
 545        qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
 546                      __func__, run->exit_reason);
 547        break;
 548    }
 549    return ret;
 550}
 551
 552bool kvm_arch_stop_on_emulation_error(CPUState *cs)
 553{
 554    return true;
 555}
 556
 557int kvm_arch_process_async_events(CPUState *cs)
 558{
 559    return 0;
 560}
 561
 562int kvm_arch_on_sigbus_vcpu(CPUState *cs, int code, void *addr)
 563{
 564    return 1;
 565}
 566
 567int kvm_arch_on_sigbus(int code, void *addr)
 568{
 569    return 1;
 570}
 571
 572/* The #ifdef protections are until 32bit headers are imported and can
 573 * be removed once both 32 and 64 bit reach feature parity.
 574 */
 575void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
 576{
 577#ifdef KVM_GUESTDBG_USE_SW_BP
 578    if (kvm_sw_breakpoints_active(cs)) {
 579        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
 580    }
 581#endif
 582#ifdef KVM_GUESTDBG_USE_HW
 583    if (kvm_arm_hw_debug_active(cs)) {
 584        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW;
 585        kvm_arm_copy_hw_debug_data(&dbg->arch);
 586    }
 587#endif
 588}
 589
 590void kvm_arch_init_irq_routing(KVMState *s)
 591{
 592}
 593
 594int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 595{
 596     if (machine_kernel_irqchip_split(ms)) {
 597         perror("-machine kernel_irqchip=split is not supported on ARM.");
 598         exit(1);
 599    }
 600
 601    /* If we can create the VGIC using the newer device control API, we
 602     * let the device do this when it initializes itself, otherwise we
 603     * fall back to the old API */
 604    return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
 605}
 606
 607int kvm_arm_vgic_probe(void)
 608{
 609    if (kvm_create_device(kvm_state,
 610                          KVM_DEV_TYPE_ARM_VGIC_V3, true) == 0) {
 611        return 3;
 612    } else if (kvm_create_device(kvm_state,
 613                                 KVM_DEV_TYPE_ARM_VGIC_V2, true) == 0) {
 614        return 2;
 615    } else {
 616        return 0;
 617    }
 618}
 619
 620int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
 621                             uint64_t address, uint32_t data, PCIDevice *dev)
 622{
 623    return 0;
 624}
 625
 626int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
 627                                int vector, PCIDevice *dev)
 628{
 629    return 0;
 630}
 631
 632int kvm_arch_release_virq_post(int virq)
 633{
 634    return 0;
 635}
 636
 637int kvm_arch_msi_data_to_gsi(uint32_t data)
 638{
 639    return (data - 32) & 0xffff;
 640}
 641