qemu/kvm-all.c
<<
>>
Prefs
   1/*
   2 * QEMU KVM support
   3 *
   4 * Copyright IBM, Corp. 2008
   5 *           Red Hat, Inc. 2008
   6 *
   7 * Authors:
   8 *  Anthony Liguori   <aliguori@us.ibm.com>
   9 *  Glauber Costa     <gcosta@redhat.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12 * See the COPYING file in the top-level directory.
  13 *
  14 */
  15
  16#include <sys/types.h>
  17#include <sys/ioctl.h>
  18#include <sys/mman.h>
  19#include <stdarg.h>
  20
  21#include <linux/kvm.h>
  22
  23#include "qemu-common.h"
  24#include "sysemu.h"
  25#include "kvm.h"
  26
  27/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
  28#define PAGE_SIZE TARGET_PAGE_SIZE
  29
  30//#define DEBUG_KVM
  31
  32#ifdef DEBUG_KVM
  33#define dprintf(fmt, ...) \
  34    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  35#else
  36#define dprintf(fmt, ...) \
  37    do { } while (0)
  38#endif
  39
  40typedef struct KVMSlot
  41{
  42    target_phys_addr_t start_addr;
  43    ram_addr_t memory_size;
  44    ram_addr_t phys_offset;
  45    int slot;
  46    int flags;
  47} KVMSlot;
  48
  49typedef struct kvm_dirty_log KVMDirtyLog;
  50
  51int kvm_allowed = 0;
  52
  53struct KVMState
  54{
  55    KVMSlot slots[32];
  56    int fd;
  57    int vmfd;
  58    int coalesced_mmio;
  59};
  60
  61static KVMState *kvm_state;
  62
  63static KVMSlot *kvm_alloc_slot(KVMState *s)
  64{
  65    int i;
  66
  67    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  68        /* KVM private memory slots */
  69        if (i >= 8 && i < 12)
  70            continue;
  71        if (s->slots[i].memory_size == 0)
  72            return &s->slots[i];
  73    }
  74
  75    return NULL;
  76}
  77
  78static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr)
  79{
  80    int i;
  81
  82    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  83        KVMSlot *mem = &s->slots[i];
  84
  85        if (start_addr >= mem->start_addr &&
  86            start_addr < (mem->start_addr + mem->memory_size))
  87            return mem;
  88    }
  89
  90    return NULL;
  91}
  92
  93static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
  94{
  95    struct kvm_userspace_memory_region mem;
  96
  97    mem.slot = slot->slot;
  98    mem.guest_phys_addr = slot->start_addr;
  99    mem.memory_size = slot->memory_size;
 100    mem.userspace_addr = (unsigned long)phys_ram_base + slot->phys_offset;
 101    mem.flags = slot->flags;
 102
 103    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
 104}
 105
 106
 107int kvm_init_vcpu(CPUState *env)
 108{
 109    KVMState *s = kvm_state;
 110    long mmap_size;
 111    int ret;
 112
 113    dprintf("kvm_init_vcpu\n");
 114
 115    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
 116    if (ret < 0) {
 117        dprintf("kvm_create_vcpu failed\n");
 118        goto err;
 119    }
 120
 121    env->kvm_fd = ret;
 122    env->kvm_state = s;
 123
 124    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
 125    if (mmap_size < 0) {
 126        dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
 127        goto err;
 128    }
 129
 130    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
 131                        env->kvm_fd, 0);
 132    if (env->kvm_run == MAP_FAILED) {
 133        ret = -errno;
 134        dprintf("mmap'ing vcpu state failed\n");
 135        goto err;
 136    }
 137
 138    ret = kvm_arch_init_vcpu(env);
 139
 140err:
 141    return ret;
 142}
 143
 144int kvm_sync_vcpus(void)
 145{
 146    CPUState *env;
 147
 148    for (env = first_cpu; env != NULL; env = env->next_cpu) {
 149        int ret;
 150
 151        ret = kvm_arch_put_registers(env);
 152        if (ret)
 153            return ret;
 154    }
 155
 156    return 0;
 157}
 158
 159/*
 160 * dirty pages logging control
 161 */
 162static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, target_phys_addr_t end_addr,
 163                                      unsigned flags,
 164                                      unsigned mask)
 165{
 166    KVMState *s = kvm_state;
 167    KVMSlot *mem = kvm_lookup_slot(s, phys_addr);
 168    if (mem == NULL)  {
 169            dprintf("invalid parameters %llx-%llx\n", phys_addr, end_addr);
 170            return -EINVAL;
 171    }
 172
 173    flags = (mem->flags & ~mask) | flags;
 174    /* Nothing changed, no need to issue ioctl */
 175    if (flags == mem->flags)
 176            return 0;
 177
 178    mem->flags = flags;
 179
 180    return kvm_set_user_memory_region(s, mem);
 181}
 182
 183int kvm_log_start(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
 184{
 185        return kvm_dirty_pages_log_change(phys_addr, end_addr,
 186                                          KVM_MEM_LOG_DIRTY_PAGES,
 187                                          KVM_MEM_LOG_DIRTY_PAGES);
 188}
 189
 190int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t end_addr)
 191{
 192        return kvm_dirty_pages_log_change(phys_addr, end_addr,
 193                                          0,
 194                                          KVM_MEM_LOG_DIRTY_PAGES);
 195}
 196
 197/**
 198 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
 199 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
 200 * This means all bits are set to dirty.
 201 *
 202 * @start_add: start of logged region. This is what we use to search the memslot
 203 * @end_addr: end of logged region.
 204 */
 205void kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr)
 206{
 207    KVMState *s = kvm_state;
 208    KVMDirtyLog d;
 209    KVMSlot *mem = kvm_lookup_slot(s, start_addr);
 210    unsigned long alloc_size;
 211    ram_addr_t addr;
 212    target_phys_addr_t phys_addr = start_addr;
 213
 214    dprintf("sync addr: %llx into %lx\n", start_addr, mem->phys_offset);
 215    if (mem == NULL) {
 216            fprintf(stderr, "BUG: %s: invalid parameters\n", __func__);
 217            return;
 218    }
 219
 220    alloc_size = mem->memory_size >> TARGET_PAGE_BITS / sizeof(d.dirty_bitmap);
 221    d.dirty_bitmap = qemu_mallocz(alloc_size);
 222
 223    d.slot = mem->slot;
 224    dprintf("slot %d, phys_addr %llx, uaddr: %llx\n",
 225            d.slot, mem->start_addr, mem->phys_offset);
 226
 227    if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
 228        dprintf("ioctl failed %d\n", errno);
 229        goto out;
 230    }
 231
 232    phys_addr = start_addr;
 233    for (addr = mem->phys_offset; phys_addr < end_addr; phys_addr+= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
 234        unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
 235        unsigned nr = (phys_addr - start_addr) >> TARGET_PAGE_BITS;
 236        unsigned word = nr / (sizeof(*bitmap) * 8);
 237        unsigned bit = nr % (sizeof(*bitmap) * 8);
 238        if ((bitmap[word] >> bit) & 1)
 239            cpu_physical_memory_set_dirty(addr);
 240    }
 241out:
 242    qemu_free(d.dirty_bitmap);
 243}
 244
 245int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 246{
 247    int ret = -ENOSYS;
 248#ifdef KVM_CAP_COALESCED_MMIO
 249    KVMState *s = kvm_state;
 250
 251    if (s->coalesced_mmio) {
 252        struct kvm_coalesced_mmio_zone zone;
 253
 254        zone.addr = start;
 255        zone.size = size;
 256
 257        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
 258    }
 259#endif
 260
 261    return ret;
 262}
 263
 264int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 265{
 266    int ret = -ENOSYS;
 267#ifdef KVM_CAP_COALESCED_MMIO
 268    KVMState *s = kvm_state;
 269
 270    if (s->coalesced_mmio) {
 271        struct kvm_coalesced_mmio_zone zone;
 272
 273        zone.addr = start;
 274        zone.size = size;
 275
 276        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
 277    }
 278#endif
 279
 280    return ret;
 281}
 282
 283int kvm_init(int smp_cpus)
 284{
 285    KVMState *s;
 286    int ret;
 287    int i;
 288
 289    if (smp_cpus > 1)
 290        return -EINVAL;
 291
 292    s = qemu_mallocz(sizeof(KVMState));
 293
 294    for (i = 0; i < ARRAY_SIZE(s->slots); i++)
 295        s->slots[i].slot = i;
 296
 297    s->vmfd = -1;
 298    s->fd = open("/dev/kvm", O_RDWR);
 299    if (s->fd == -1) {
 300        fprintf(stderr, "Could not access KVM kernel module: %m\n");
 301        ret = -errno;
 302        goto err;
 303    }
 304
 305    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
 306    if (ret < KVM_API_VERSION) {
 307        if (ret > 0)
 308            ret = -EINVAL;
 309        fprintf(stderr, "kvm version too old\n");
 310        goto err;
 311    }
 312
 313    if (ret > KVM_API_VERSION) {
 314        ret = -EINVAL;
 315        fprintf(stderr, "kvm version not supported\n");
 316        goto err;
 317    }
 318
 319    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
 320    if (s->vmfd < 0)
 321        goto err;
 322
 323    /* initially, KVM allocated its own memory and we had to jump through
 324     * hooks to make phys_ram_base point to this.  Modern versions of KVM
 325     * just use a user allocated buffer so we can use phys_ram_base
 326     * unmodified.  Make sure we have a sufficiently modern version of KVM.
 327     */
 328    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
 329    if (ret <= 0) {
 330        if (ret == 0)
 331            ret = -EINVAL;
 332        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n");
 333        goto err;
 334    }
 335
 336    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
 337     * destroyed properly.  Since we rely on this capability, refuse to work
 338     * with any kernel without this capability. */
 339    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION,
 340                    KVM_CAP_DESTROY_MEMORY_REGION_WORKS);
 341    if (ret <= 0) {
 342        if (ret == 0)
 343            ret = -EINVAL;
 344
 345        fprintf(stderr,
 346                "KVM kernel module broken (DESTROY_MEMORY_REGION)\n"
 347                "Please upgrade to at least kvm-81.\n");
 348        goto err;
 349    }
 350
 351    s->coalesced_mmio = 0;
 352#ifdef KVM_CAP_COALESCED_MMIO
 353    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
 354    if (ret > 0)
 355        s->coalesced_mmio = ret;
 356#endif
 357
 358    ret = kvm_arch_init(s, smp_cpus);
 359    if (ret < 0)
 360        goto err;
 361
 362    kvm_state = s;
 363
 364    return 0;
 365
 366err:
 367    if (s) {
 368        if (s->vmfd != -1)
 369            close(s->vmfd);
 370        if (s->fd != -1)
 371            close(s->fd);
 372    }
 373    qemu_free(s);
 374
 375    return ret;
 376}
 377
 378static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
 379                         int direction, int size, uint32_t count)
 380{
 381    int i;
 382    uint8_t *ptr = data;
 383
 384    for (i = 0; i < count; i++) {
 385        if (direction == KVM_EXIT_IO_IN) {
 386            switch (size) {
 387            case 1:
 388                stb_p(ptr, cpu_inb(env, port));
 389                break;
 390            case 2:
 391                stw_p(ptr, cpu_inw(env, port));
 392                break;
 393            case 4:
 394                stl_p(ptr, cpu_inl(env, port));
 395                break;
 396            }
 397        } else {
 398            switch (size) {
 399            case 1:
 400                cpu_outb(env, port, ldub_p(ptr));
 401                break;
 402            case 2:
 403                cpu_outw(env, port, lduw_p(ptr));
 404                break;
 405            case 4:
 406                cpu_outl(env, port, ldl_p(ptr));
 407                break;
 408            }
 409        }
 410
 411        ptr += size;
 412    }
 413
 414    return 1;
 415}
 416
 417static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
 418{
 419#ifdef KVM_CAP_COALESCED_MMIO
 420    KVMState *s = kvm_state;
 421    if (s->coalesced_mmio) {
 422        struct kvm_coalesced_mmio_ring *ring;
 423
 424        ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
 425        while (ring->first != ring->last) {
 426            struct kvm_coalesced_mmio *ent;
 427
 428            ent = &ring->coalesced_mmio[ring->first];
 429
 430            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
 431            /* FIXME smp_wmb() */
 432            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
 433        }
 434    }
 435#endif
 436}
 437
 438int kvm_cpu_exec(CPUState *env)
 439{
 440    struct kvm_run *run = env->kvm_run;
 441    int ret;
 442
 443    dprintf("kvm_cpu_exec()\n");
 444
 445    do {
 446        kvm_arch_pre_run(env, run);
 447
 448        if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
 449            dprintf("interrupt exit requested\n");
 450            ret = 0;
 451            break;
 452        }
 453
 454        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
 455        kvm_arch_post_run(env, run);
 456
 457        if (ret == -EINTR || ret == -EAGAIN) {
 458            dprintf("io window exit\n");
 459            ret = 0;
 460            break;
 461        }
 462
 463        if (ret < 0) {
 464            dprintf("kvm run failed %s\n", strerror(-ret));
 465            abort();
 466        }
 467
 468        kvm_run_coalesced_mmio(env, run);
 469
 470        ret = 0; /* exit loop */
 471        switch (run->exit_reason) {
 472        case KVM_EXIT_IO:
 473            dprintf("handle_io\n");
 474            ret = kvm_handle_io(env, run->io.port,
 475                                (uint8_t *)run + run->io.data_offset,
 476                                run->io.direction,
 477                                run->io.size,
 478                                run->io.count);
 479            break;
 480        case KVM_EXIT_MMIO:
 481            dprintf("handle_mmio\n");
 482            cpu_physical_memory_rw(run->mmio.phys_addr,
 483                                   run->mmio.data,
 484                                   run->mmio.len,
 485                                   run->mmio.is_write);
 486            ret = 1;
 487            break;
 488        case KVM_EXIT_IRQ_WINDOW_OPEN:
 489            dprintf("irq_window_open\n");
 490            break;
 491        case KVM_EXIT_SHUTDOWN:
 492            dprintf("shutdown\n");
 493            qemu_system_reset_request();
 494            ret = 1;
 495            break;
 496        case KVM_EXIT_UNKNOWN:
 497            dprintf("kvm_exit_unknown\n");
 498            break;
 499        case KVM_EXIT_FAIL_ENTRY:
 500            dprintf("kvm_exit_fail_entry\n");
 501            break;
 502        case KVM_EXIT_EXCEPTION:
 503            dprintf("kvm_exit_exception\n");
 504            break;
 505        case KVM_EXIT_DEBUG:
 506            dprintf("kvm_exit_debug\n");
 507            break;
 508        default:
 509            dprintf("kvm_arch_handle_exit\n");
 510            ret = kvm_arch_handle_exit(env, run);
 511            break;
 512        }
 513    } while (ret > 0);
 514
 515    if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
 516        env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
 517        env->exception_index = EXCP_INTERRUPT;
 518    }
 519
 520    return ret;
 521}
 522
 523void kvm_set_phys_mem(target_phys_addr_t start_addr,
 524                      ram_addr_t size,
 525                      ram_addr_t phys_offset)
 526{
 527    KVMState *s = kvm_state;
 528    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
 529    KVMSlot *mem;
 530
 531    /* KVM does not support read-only slots */
 532    phys_offset &= ~IO_MEM_ROM;
 533
 534    mem = kvm_lookup_slot(s, start_addr);
 535    if (mem) {
 536        if ((flags == IO_MEM_UNASSIGNED) || (flags >= TLB_MMIO)) {
 537            mem->memory_size = 0;
 538            mem->start_addr = start_addr;
 539            mem->phys_offset = 0;
 540            mem->flags = 0;
 541
 542            kvm_set_user_memory_region(s, mem);
 543        } else if (start_addr >= mem->start_addr &&
 544                   (start_addr + size) <= (mem->start_addr +
 545                                           mem->memory_size)) {
 546            KVMSlot slot;
 547            target_phys_addr_t mem_start;
 548            ram_addr_t mem_size, mem_offset;
 549
 550            /* Not splitting */
 551            if ((phys_offset - (start_addr - mem->start_addr)) == 
 552                mem->phys_offset)
 553                return;
 554
 555            /* unregister whole slot */
 556            memcpy(&slot, mem, sizeof(slot));
 557            mem->memory_size = 0;
 558            kvm_set_user_memory_region(s, mem);
 559
 560            /* register prefix slot */
 561            mem_start = slot.start_addr;
 562            mem_size = start_addr - slot.start_addr;
 563            mem_offset = slot.phys_offset;
 564            if (mem_size)
 565                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
 566
 567            /* register new slot */
 568            kvm_set_phys_mem(start_addr, size, phys_offset);
 569
 570            /* register suffix slot */
 571            mem_start = start_addr + size;
 572            mem_offset += mem_size + size;
 573            mem_size = slot.memory_size - mem_size - size;
 574            if (mem_size)
 575                kvm_set_phys_mem(mem_start, mem_size, mem_offset);
 576
 577            return;
 578        } else {
 579            printf("Registering overlapping slot\n");
 580            abort();
 581        }
 582    }
 583    /* KVM does not need to know about this memory */
 584    if (flags >= IO_MEM_UNASSIGNED)
 585        return;
 586
 587    mem = kvm_alloc_slot(s);
 588    mem->memory_size = size;
 589    mem->start_addr = start_addr;
 590    mem->phys_offset = phys_offset;
 591    mem->flags = 0;
 592
 593    kvm_set_user_memory_region(s, mem);
 594    /* FIXME deal with errors */
 595}
 596
 597int kvm_ioctl(KVMState *s, int type, ...)
 598{
 599    int ret;
 600    void *arg;
 601    va_list ap;
 602
 603    va_start(ap, type);
 604    arg = va_arg(ap, void *);
 605    va_end(ap);
 606
 607    ret = ioctl(s->fd, type, arg);
 608    if (ret == -1)
 609        ret = -errno;
 610
 611    return ret;
 612}
 613
 614int kvm_vm_ioctl(KVMState *s, int type, ...)
 615{
 616    int ret;
 617    void *arg;
 618    va_list ap;
 619
 620    va_start(ap, type);
 621    arg = va_arg(ap, void *);
 622    va_end(ap);
 623
 624    ret = ioctl(s->vmfd, type, arg);
 625    if (ret == -1)
 626        ret = -errno;
 627
 628    return ret;
 629}
 630
 631int kvm_vcpu_ioctl(CPUState *env, int type, ...)
 632{
 633    int ret;
 634    void *arg;
 635    va_list ap;
 636
 637    va_start(ap, type);
 638    arg = va_arg(ap, void *);
 639    va_end(ap);
 640
 641    ret = ioctl(env->kvm_fd, type, arg);
 642    if (ret == -1)
 643        ret = -errno;
 644
 645    return ret;
 646}
 647
 648int kvm_has_sync_mmu(void)
 649{
 650#ifdef KVM_CAP_SYNC_MMU
 651    KVMState *s = kvm_state;
 652
 653    if (kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU) > 0)
 654        return 1;
 655#endif
 656
 657    return 0;
 658}
 659