linux/tools/testing/selftests/kvm/lib/kvm_util.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * tools/testing/selftests/kvm/lib/kvm_util.c
   4 *
   5 * Copyright (C) 2018, Google LLC.
   6 */
   7
   8#define _GNU_SOURCE /* for program_invocation_name */
   9#include "test_util.h"
  10#include "kvm_util.h"
  11#include "kvm_util_internal.h"
  12#include "processor.h"
  13
  14#include <assert.h>
  15#include <sys/mman.h>
  16#include <sys/types.h>
  17#include <sys/stat.h>
  18#include <unistd.h>
  19#include <linux/kernel.h>
  20
  21#define KVM_UTIL_MIN_PFN        2
  22
  23static int vcpu_mmap_sz(void);
  24
  25/* Aligns x up to the next multiple of size. Size must be a power of 2. */
  26static void *align(void *x, size_t size)
  27{
  28        size_t mask = size - 1;
  29        TEST_ASSERT(size != 0 && !(size & (size - 1)),
  30                    "size not a power of 2: %lu", size);
  31        return (void *) (((size_t) x + mask) & ~mask);
  32}
  33
  34/*
  35 * Open KVM_DEV_PATH if available, otherwise exit the entire program.
  36 *
  37 * Input Args:
  38 *   flags - The flags to pass when opening KVM_DEV_PATH.
  39 *
  40 * Return:
  41 *   The opened file descriptor of /dev/kvm.
  42 */
  43static int _open_kvm_dev_path_or_exit(int flags)
  44{
  45        int fd;
  46
  47        fd = open(KVM_DEV_PATH, flags);
  48        if (fd < 0) {
  49                print_skip("%s not available, is KVM loaded? (errno: %d)",
  50                           KVM_DEV_PATH, errno);
  51                exit(KSFT_SKIP);
  52        }
  53
  54        return fd;
  55}
  56
  57int open_kvm_dev_path_or_exit(void)
  58{
  59        return _open_kvm_dev_path_or_exit(O_RDONLY);
  60}
  61
  62/*
  63 * Capability
  64 *
  65 * Input Args:
  66 *   cap - Capability
  67 *
  68 * Output Args: None
  69 *
  70 * Return:
  71 *   On success, the Value corresponding to the capability (KVM_CAP_*)
  72 *   specified by the value of cap.  On failure a TEST_ASSERT failure
  73 *   is produced.
  74 *
  75 * Looks up and returns the value corresponding to the capability
  76 * (KVM_CAP_*) given by cap.
  77 */
  78int kvm_check_cap(long cap)
  79{
  80        int ret;
  81        int kvm_fd;
  82
  83        kvm_fd = open_kvm_dev_path_or_exit();
  84        ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
  85        TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
  86                "  rc: %i errno: %i", ret, errno);
  87
  88        close(kvm_fd);
  89
  90        return ret;
  91}
  92
  93/* VM Enable Capability
  94 *
  95 * Input Args:
  96 *   vm - Virtual Machine
  97 *   cap - Capability
  98 *
  99 * Output Args: None
 100 *
 101 * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
 102 *
 103 * Enables a capability (KVM_CAP_*) on the VM.
 104 */
 105int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
 106{
 107        int ret;
 108
 109        ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
 110        TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
 111                "  rc: %i errno: %i", ret, errno);
 112
 113        return ret;
 114}
 115
 116/* VCPU Enable Capability
 117 *
 118 * Input Args:
 119 *   vm - Virtual Machine
 120 *   vcpu_id - VCPU
 121 *   cap - Capability
 122 *
 123 * Output Args: None
 124 *
 125 * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
 126 *
 127 * Enables a capability (KVM_CAP_*) on the VCPU.
 128 */
 129int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
 130                    struct kvm_enable_cap *cap)
 131{
 132        struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
 133        int r;
 134
 135        TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
 136
 137        r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
 138        TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
 139                        "  rc: %i, errno: %i", r, errno);
 140
 141        return r;
 142}
 143
 144void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
 145{
 146        struct kvm_enable_cap cap = { 0 };
 147
 148        cap.cap = KVM_CAP_DIRTY_LOG_RING;
 149        cap.args[0] = ring_size;
 150        vm_enable_cap(vm, &cap);
 151        vm->dirty_ring_size = ring_size;
 152}
 153
 154static void vm_open(struct kvm_vm *vm, int perm)
 155{
 156        vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
 157
 158        if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
 159                print_skip("immediate_exit not available");
 160                exit(KSFT_SKIP);
 161        }
 162
 163        vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
 164        TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
 165                "rc: %i errno: %i", vm->fd, errno);
 166}
 167
 168const char *vm_guest_mode_string(uint32_t i)
 169{
 170        static const char * const strings[] = {
 171                [VM_MODE_P52V48_4K]     = "PA-bits:52,  VA-bits:48,  4K pages",
 172                [VM_MODE_P52V48_64K]    = "PA-bits:52,  VA-bits:48, 64K pages",
 173                [VM_MODE_P48V48_4K]     = "PA-bits:48,  VA-bits:48,  4K pages",
 174                [VM_MODE_P48V48_64K]    = "PA-bits:48,  VA-bits:48, 64K pages",
 175                [VM_MODE_P40V48_4K]     = "PA-bits:40,  VA-bits:48,  4K pages",
 176                [VM_MODE_P40V48_64K]    = "PA-bits:40,  VA-bits:48, 64K pages",
 177                [VM_MODE_PXXV48_4K]     = "PA-bits:ANY, VA-bits:48,  4K pages",
 178                [VM_MODE_P47V64_4K]     = "PA-bits:47,  VA-bits:64,  4K pages",
 179                [VM_MODE_P44V64_4K]     = "PA-bits:44,  VA-bits:64,  4K pages",
 180        };
 181        _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
 182                       "Missing new mode strings?");
 183
 184        TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
 185
 186        return strings[i];
 187}
 188
 189const struct vm_guest_mode_params vm_guest_mode_params[] = {
 190        { 52, 48,  0x1000, 12 },
 191        { 52, 48, 0x10000, 16 },
 192        { 48, 48,  0x1000, 12 },
 193        { 48, 48, 0x10000, 16 },
 194        { 40, 48,  0x1000, 12 },
 195        { 40, 48, 0x10000, 16 },
 196        {  0,  0,  0x1000, 12 },
 197        { 47, 64,  0x1000, 12 },
 198        { 44, 64,  0x1000, 12 },
 199};
 200_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
 201               "Missing new mode params?");
 202
 203/*
 204 * VM Create
 205 *
 206 * Input Args:
 207 *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
 208 *   phy_pages - Physical memory pages
 209 *   perm - permission
 210 *
 211 * Output Args: None
 212 *
 213 * Return:
 214 *   Pointer to opaque structure that describes the created VM.
 215 *
 216 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
 217 * When phy_pages is non-zero, a memory region of phy_pages physical pages
 218 * is created and mapped starting at guest physical address 0.  The file
 219 * descriptor to control the created VM is created with the permissions
 220 * given by perm (e.g. O_RDWR).
 221 */
 222struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 223{
 224        struct kvm_vm *vm;
 225
 226        pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
 227                 vm_guest_mode_string(mode), phy_pages, perm);
 228
 229        vm = calloc(1, sizeof(*vm));
 230        TEST_ASSERT(vm != NULL, "Insufficient Memory");
 231
 232        INIT_LIST_HEAD(&vm->vcpus);
 233        vm->regions.gpa_tree = RB_ROOT;
 234        vm->regions.hva_tree = RB_ROOT;
 235        hash_init(vm->regions.slot_hash);
 236
 237        vm->mode = mode;
 238        vm->type = 0;
 239
 240        vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
 241        vm->va_bits = vm_guest_mode_params[mode].va_bits;
 242        vm->page_size = vm_guest_mode_params[mode].page_size;
 243        vm->page_shift = vm_guest_mode_params[mode].page_shift;
 244
 245        /* Setup mode specific traits. */
 246        switch (vm->mode) {
 247        case VM_MODE_P52V48_4K:
 248                vm->pgtable_levels = 4;
 249                break;
 250        case VM_MODE_P52V48_64K:
 251                vm->pgtable_levels = 3;
 252                break;
 253        case VM_MODE_P48V48_4K:
 254                vm->pgtable_levels = 4;
 255                break;
 256        case VM_MODE_P48V48_64K:
 257                vm->pgtable_levels = 3;
 258                break;
 259        case VM_MODE_P40V48_4K:
 260                vm->pgtable_levels = 4;
 261                break;
 262        case VM_MODE_P40V48_64K:
 263                vm->pgtable_levels = 3;
 264                break;
 265        case VM_MODE_PXXV48_4K:
 266#ifdef __x86_64__
 267                kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
 268                /*
 269                 * Ignore KVM support for 5-level paging (vm->va_bits == 57),
 270                 * it doesn't take effect unless a CR4.LA57 is set, which it
 271                 * isn't for this VM_MODE.
 272                 */
 273                TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
 274                            "Linear address width (%d bits) not supported",
 275                            vm->va_bits);
 276                pr_debug("Guest physical address width detected: %d\n",
 277                         vm->pa_bits);
 278                vm->pgtable_levels = 4;
 279                vm->va_bits = 48;
 280#else
 281                TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
 282#endif
 283                break;
 284        case VM_MODE_P47V64_4K:
 285                vm->pgtable_levels = 5;
 286                break;
 287        case VM_MODE_P44V64_4K:
 288                vm->pgtable_levels = 5;
 289                break;
 290        default:
 291                TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
 292        }
 293
 294#ifdef __aarch64__
 295        if (vm->pa_bits != 40)
 296                vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
 297#endif
 298
 299        vm_open(vm, perm);
 300
 301        /* Limit to VA-bit canonical virtual addresses. */
 302        vm->vpages_valid = sparsebit_alloc();
 303        sparsebit_set_num(vm->vpages_valid,
 304                0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
 305        sparsebit_set_num(vm->vpages_valid,
 306                (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
 307                (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
 308
 309        /* Limit physical addresses to PA-bits. */
 310        vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
 311
 312        /* Allocate and setup memory for guest. */
 313        vm->vpages_mapped = sparsebit_alloc();
 314        if (phy_pages != 0)
 315                vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
 316                                            0, 0, phy_pages, 0);
 317
 318        return vm;
 319}
 320
 321/*
 322 * VM Create with customized parameters
 323 *
 324 * Input Args:
 325 *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
 326 *   nr_vcpus - VCPU count
 327 *   slot0_mem_pages - Slot0 physical memory size
 328 *   extra_mem_pages - Non-slot0 physical memory total size
 329 *   num_percpu_pages - Per-cpu physical memory pages
 330 *   guest_code - Guest entry point
 331 *   vcpuids - VCPU IDs
 332 *
 333 * Output Args: None
 334 *
 335 * Return:
 336 *   Pointer to opaque structure that describes the created VM.
 337 *
 338 * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
 339 * with customized slot0 memory size, at least 512 pages currently.
 340 * extra_mem_pages is only used to calculate the maximum page table size,
 341 * no real memory allocation for non-slot0 memory in this function.
 342 */
 343struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
 344                                    uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
 345                                    uint32_t num_percpu_pages, void *guest_code,
 346                                    uint32_t vcpuids[])
 347{
 348        uint64_t vcpu_pages, extra_pg_pages, pages;
 349        struct kvm_vm *vm;
 350        int i;
 351
 352        /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
 353        if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
 354                slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
 355
 356        /* The maximum page table size for a memory region will be when the
 357         * smallest pages are used. Considering each page contains x page
 358         * table descriptors, the total extra size for page tables (for extra
 359         * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
 360         * than N/x*2.
 361         */
 362        vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
 363        extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
 364        pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
 365
 366        TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
 367                    "nr_vcpus = %d too large for host, max-vcpus = %d",
 368                    nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
 369
 370        pages = vm_adjust_num_guest_pages(mode, pages);
 371        vm = vm_create(mode, pages, O_RDWR);
 372
 373        kvm_vm_elf_load(vm, program_invocation_name);
 374
 375#ifdef __x86_64__
 376        vm_create_irqchip(vm);
 377#endif
 378
 379        for (i = 0; i < nr_vcpus; ++i) {
 380                uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
 381
 382                vm_vcpu_add_default(vm, vcpuid, guest_code);
 383        }
 384
 385        return vm;
 386}
 387
 388struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
 389                                            uint32_t num_percpu_pages, void *guest_code,
 390                                            uint32_t vcpuids[])
 391{
 392        return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
 393                                    extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
 394}
 395
 396struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 397                                 void *guest_code)
 398{
 399        return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
 400                                            (uint32_t []){ vcpuid });
 401}
 402
 403/*
 404 * VM Restart
 405 *
 406 * Input Args:
 407 *   vm - VM that has been released before
 408 *   perm - permission
 409 *
 410 * Output Args: None
 411 *
 412 * Reopens the file descriptors associated to the VM and reinstates the
 413 * global state, such as the irqchip and the memory regions that are mapped
 414 * into the guest.
 415 */
 416void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 417{
 418        int ctr;
 419        struct userspace_mem_region *region;
 420
 421        vm_open(vmp, perm);
 422        if (vmp->has_irqchip)
 423                vm_create_irqchip(vmp);
 424
 425        hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
 426                int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
 427                TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
 428                            "  rc: %i errno: %i\n"
 429                            "  slot: %u flags: 0x%x\n"
 430                            "  guest_phys_addr: 0x%llx size: 0x%llx",
 431                            ret, errno, region->region.slot,
 432                            region->region.flags,
 433                            region->region.guest_phys_addr,
 434                            region->region.memory_size);
 435        }
 436}
 437
 438void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
 439{
 440        struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
 441        int ret;
 442
 443        ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
 444        TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
 445                    __func__, strerror(-ret));
 446}
 447
 448void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
 449                            uint64_t first_page, uint32_t num_pages)
 450{
 451        struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
 452                                            .first_page = first_page,
 453                                            .num_pages = num_pages };
 454        int ret;
 455
 456        ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
 457        TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
 458                    __func__, strerror(-ret));
 459}
 460
 461uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
 462{
 463        return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
 464}
 465
 466/*
 467 * Userspace Memory Region Find
 468 *
 469 * Input Args:
 470 *   vm - Virtual Machine
 471 *   start - Starting VM physical address
 472 *   end - Ending VM physical address, inclusive.
 473 *
 474 * Output Args: None
 475 *
 476 * Return:
 477 *   Pointer to overlapping region, NULL if no such region.
 478 *
 479 * Searches for a region with any physical memory that overlaps with
 480 * any portion of the guest physical addresses from start to end
 481 * inclusive.  If multiple overlapping regions exist, a pointer to any
 482 * of the regions is returned.  Null is returned only when no overlapping
 483 * region exists.
 484 */
 485static struct userspace_mem_region *
 486userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
 487{
 488        struct rb_node *node;
 489
 490        for (node = vm->regions.gpa_tree.rb_node; node; ) {
 491                struct userspace_mem_region *region =
 492                        container_of(node, struct userspace_mem_region, gpa_node);
 493                uint64_t existing_start = region->region.guest_phys_addr;
 494                uint64_t existing_end = region->region.guest_phys_addr
 495                        + region->region.memory_size - 1;
 496                if (start <= existing_end && end >= existing_start)
 497                        return region;
 498
 499                if (start < existing_start)
 500                        node = node->rb_left;
 501                else
 502                        node = node->rb_right;
 503        }
 504
 505        return NULL;
 506}
 507
 508/*
 509 * KVM Userspace Memory Region Find
 510 *
 511 * Input Args:
 512 *   vm - Virtual Machine
 513 *   start - Starting VM physical address
 514 *   end - Ending VM physical address, inclusive.
 515 *
 516 * Output Args: None
 517 *
 518 * Return:
 519 *   Pointer to overlapping region, NULL if no such region.
 520 *
 521 * Public interface to userspace_mem_region_find. Allows tests to look up
 522 * the memslot datastructure for a given range of guest physical memory.
 523 */
 524struct kvm_userspace_memory_region *
 525kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 526                                 uint64_t end)
 527{
 528        struct userspace_mem_region *region;
 529
 530        region = userspace_mem_region_find(vm, start, end);
 531        if (!region)
 532                return NULL;
 533
 534        return &region->region;
 535}
 536
 537/*
 538 * VCPU Find
 539 *
 540 * Input Args:
 541 *   vm - Virtual Machine
 542 *   vcpuid - VCPU ID
 543 *
 544 * Output Args: None
 545 *
 546 * Return:
 547 *   Pointer to VCPU structure
 548 *
 549 * Locates a vcpu structure that describes the VCPU specified by vcpuid and
 550 * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
 551 * for the specified vcpuid.
 552 */
 553struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
 554{
 555        struct vcpu *vcpu;
 556
 557        list_for_each_entry(vcpu, &vm->vcpus, list) {
 558                if (vcpu->id == vcpuid)
 559                        return vcpu;
 560        }
 561
 562        return NULL;
 563}
 564
 565/*
 566 * VM VCPU Remove
 567 *
 568 * Input Args:
 569 *   vcpu - VCPU to remove
 570 *
 571 * Output Args: None
 572 *
 573 * Return: None, TEST_ASSERT failures for all error conditions
 574 *
 575 * Removes a vCPU from a VM and frees its resources.
 576 */
 577static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
 578{
 579        int ret;
 580
 581        if (vcpu->dirty_gfns) {
 582                ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
 583                TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
 584                            "rc: %i errno: %i", ret, errno);
 585                vcpu->dirty_gfns = NULL;
 586        }
 587
 588        ret = munmap(vcpu->state, vcpu_mmap_sz());
 589        TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
 590                "errno: %i", ret, errno);
 591        ret = close(vcpu->fd);
 592        TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
 593                "errno: %i", ret, errno);
 594
 595        list_del(&vcpu->list);
 596        free(vcpu);
 597}
 598
 599void kvm_vm_release(struct kvm_vm *vmp)
 600{
 601        struct vcpu *vcpu, *tmp;
 602        int ret;
 603
 604        list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
 605                vm_vcpu_rm(vmp, vcpu);
 606
 607        ret = close(vmp->fd);
 608        TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
 609                "  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
 610
 611        ret = close(vmp->kvm_fd);
 612        TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
 613                "  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
 614}
 615
 616static void __vm_mem_region_delete(struct kvm_vm *vm,
 617                                   struct userspace_mem_region *region,
 618                                   bool unlink)
 619{
 620        int ret;
 621
 622        if (unlink) {
 623                rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
 624                rb_erase(&region->hva_node, &vm->regions.hva_tree);
 625                hash_del(&region->slot_node);
 626        }
 627
 628        region->region.memory_size = 0;
 629        ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
 630        TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
 631                    "rc: %i errno: %i", ret, errno);
 632
 633        sparsebit_free(&region->unused_phy_pages);
 634        ret = munmap(region->mmap_start, region->mmap_size);
 635        TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
 636
 637        free(region);
 638}
 639
 640/*
 641 * Destroys and frees the VM pointed to by vmp.
 642 */
 643void kvm_vm_free(struct kvm_vm *vmp)
 644{
 645        int ctr;
 646        struct hlist_node *node;
 647        struct userspace_mem_region *region;
 648
 649        if (vmp == NULL)
 650                return;
 651
 652        /* Free userspace_mem_regions. */
 653        hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
 654                __vm_mem_region_delete(vmp, region, false);
 655
 656        /* Free sparsebit arrays. */
 657        sparsebit_free(&vmp->vpages_valid);
 658        sparsebit_free(&vmp->vpages_mapped);
 659
 660        kvm_vm_release(vmp);
 661
 662        /* Free the structure describing the VM. */
 663        free(vmp);
 664}
 665
 666/*
 667 * Memory Compare, host virtual to guest virtual
 668 *
 669 * Input Args:
 670 *   hva - Starting host virtual address
 671 *   vm - Virtual Machine
 672 *   gva - Starting guest virtual address
 673 *   len - number of bytes to compare
 674 *
 675 * Output Args: None
 676 *
 677 * Input/Output Args: None
 678 *
 679 * Return:
 680 *   Returns 0 if the bytes starting at hva for a length of len
 681 *   are equal the guest virtual bytes starting at gva.  Returns
 682 *   a value < 0, if bytes at hva are less than those at gva.
 683 *   Otherwise a value > 0 is returned.
 684 *
 685 * Compares the bytes starting at the host virtual address hva, for
 686 * a length of len, to the guest bytes starting at the guest virtual
 687 * address given by gva.
 688 */
 689int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
 690{
 691        size_t amt;
 692
 693        /*
 694         * Compare a batch of bytes until either a match is found
 695         * or all the bytes have been compared.
 696         */
 697        for (uintptr_t offset = 0; offset < len; offset += amt) {
 698                uintptr_t ptr1 = (uintptr_t)hva + offset;
 699
 700                /*
 701                 * Determine host address for guest virtual address
 702                 * at offset.
 703                 */
 704                uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
 705
 706                /*
 707                 * Determine amount to compare on this pass.
 708                 * Don't allow the comparsion to cross a page boundary.
 709                 */
 710                amt = len - offset;
 711                if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
 712                        amt = vm->page_size - (ptr1 % vm->page_size);
 713                if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
 714                        amt = vm->page_size - (ptr2 % vm->page_size);
 715
 716                assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
 717                assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
 718
 719                /*
 720                 * Perform the comparison.  If there is a difference
 721                 * return that result to the caller, otherwise need
 722                 * to continue on looking for a mismatch.
 723                 */
 724                int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
 725                if (ret != 0)
 726                        return ret;
 727        }
 728
 729        /*
 730         * No mismatch found.  Let the caller know the two memory
 731         * areas are equal.
 732         */
 733        return 0;
 734}
 735
 736static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
 737                                               struct userspace_mem_region *region)
 738{
 739        struct rb_node **cur, *parent;
 740
 741        for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
 742                struct userspace_mem_region *cregion;
 743
 744                cregion = container_of(*cur, typeof(*cregion), gpa_node);
 745                parent = *cur;
 746                if (region->region.guest_phys_addr <
 747                    cregion->region.guest_phys_addr)
 748                        cur = &(*cur)->rb_left;
 749                else {
 750                        TEST_ASSERT(region->region.guest_phys_addr !=
 751                                    cregion->region.guest_phys_addr,
 752                                    "Duplicate GPA in region tree");
 753
 754                        cur = &(*cur)->rb_right;
 755                }
 756        }
 757
 758        rb_link_node(&region->gpa_node, parent, cur);
 759        rb_insert_color(&region->gpa_node, gpa_tree);
 760}
 761
 762static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
 763                                               struct userspace_mem_region *region)
 764{
 765        struct rb_node **cur, *parent;
 766
 767        for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
 768                struct userspace_mem_region *cregion;
 769
 770                cregion = container_of(*cur, typeof(*cregion), hva_node);
 771                parent = *cur;
 772                if (region->host_mem < cregion->host_mem)
 773                        cur = &(*cur)->rb_left;
 774                else {
 775                        TEST_ASSERT(region->host_mem !=
 776                                    cregion->host_mem,
 777                                    "Duplicate HVA in region tree");
 778
 779                        cur = &(*cur)->rb_right;
 780                }
 781        }
 782
 783        rb_link_node(&region->hva_node, parent, cur);
 784        rb_insert_color(&region->hva_node, hva_tree);
 785}
 786
 787/*
 788 * VM Userspace Memory Region Add
 789 *
 790 * Input Args:
 791 *   vm - Virtual Machine
 792 *   src_type - Storage source for this region.
 793 *              NULL to use anonymous memory.
 794 *   guest_paddr - Starting guest physical address
 795 *   slot - KVM region slot
 796 *   npages - Number of physical pages
 797 *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
 798 *
 799 * Output Args: None
 800 *
 801 * Return: None
 802 *
 803 * Allocates a memory area of the number of pages specified by npages
 804 * and maps it to the VM specified by vm, at a starting physical address
 805 * given by guest_paddr.  The region is created with a KVM region slot
 806 * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
 807 * region is created with the flags given by flags.
 808 */
 809void vm_userspace_mem_region_add(struct kvm_vm *vm,
 810        enum vm_mem_backing_src_type src_type,
 811        uint64_t guest_paddr, uint32_t slot, uint64_t npages,
 812        uint32_t flags)
 813{
 814        int ret;
 815        struct userspace_mem_region *region;
 816        size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
 817        size_t alignment;
 818
 819        TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
 820                "Number of guest pages is not compatible with the host. "
 821                "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
 822
 823        TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
 824                "address not on a page boundary.\n"
 825                "  guest_paddr: 0x%lx vm->page_size: 0x%x",
 826                guest_paddr, vm->page_size);
 827        TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
 828                <= vm->max_gfn, "Physical range beyond maximum "
 829                "supported physical address,\n"
 830                "  guest_paddr: 0x%lx npages: 0x%lx\n"
 831                "  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 832                guest_paddr, npages, vm->max_gfn, vm->page_size);
 833
 834        /*
 835         * Confirm a mem region with an overlapping address doesn't
 836         * already exist.
 837         */
 838        region = (struct userspace_mem_region *) userspace_mem_region_find(
 839                vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
 840        if (region != NULL)
 841                TEST_FAIL("overlapping userspace_mem_region already "
 842                        "exists\n"
 843                        "  requested guest_paddr: 0x%lx npages: 0x%lx "
 844                        "page_size: 0x%x\n"
 845                        "  existing guest_paddr: 0x%lx size: 0x%lx",
 846                        guest_paddr, npages, vm->page_size,
 847                        (uint64_t) region->region.guest_phys_addr,
 848                        (uint64_t) region->region.memory_size);
 849
 850        /* Confirm no region with the requested slot already exists. */
 851        hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
 852                               slot) {
 853                if (region->region.slot != slot)
 854                        continue;
 855
 856                TEST_FAIL("A mem region with the requested slot "
 857                        "already exists.\n"
 858                        "  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
 859                        "  existing slot: %u paddr: 0x%lx size: 0x%lx",
 860                        slot, guest_paddr, npages,
 861                        region->region.slot,
 862                        (uint64_t) region->region.guest_phys_addr,
 863                        (uint64_t) region->region.memory_size);
 864        }
 865
 866        /* Allocate and initialize new mem region structure. */
 867        region = calloc(1, sizeof(*region));
 868        TEST_ASSERT(region != NULL, "Insufficient Memory");
 869        region->mmap_size = npages * vm->page_size;
 870
 871#ifdef __s390x__
 872        /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
 873        alignment = 0x100000;
 874#else
 875        alignment = 1;
 876#endif
 877
 878        if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
 879                alignment = max(backing_src_pagesz, alignment);
 880
 881        /* Add enough memory to align up if necessary */
 882        if (alignment > 1)
 883                region->mmap_size += alignment;
 884
 885        region->fd = -1;
 886        if (backing_src_is_shared(src_type)) {
 887                int memfd_flags = MFD_CLOEXEC;
 888
 889                if (src_type == VM_MEM_SRC_SHARED_HUGETLB)
 890                        memfd_flags |= MFD_HUGETLB;
 891
 892                region->fd = memfd_create("kvm_selftest", memfd_flags);
 893                TEST_ASSERT(region->fd != -1,
 894                            "memfd_create failed, errno: %i", errno);
 895
 896                ret = ftruncate(region->fd, region->mmap_size);
 897                TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno);
 898
 899                ret = fallocate(region->fd,
 900                                FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
 901                                region->mmap_size);
 902                TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno);
 903        }
 904
 905        region->mmap_start = mmap(NULL, region->mmap_size,
 906                                  PROT_READ | PROT_WRITE,
 907                                  vm_mem_backing_src_alias(src_type)->flag,
 908                                  region->fd, 0);
 909        TEST_ASSERT(region->mmap_start != MAP_FAILED,
 910                    "test_malloc failed, mmap_start: %p errno: %i",
 911                    region->mmap_start, errno);
 912
 913        /* Align host address */
 914        region->host_mem = align(region->mmap_start, alignment);
 915
 916        /* As needed perform madvise */
 917        if ((src_type == VM_MEM_SRC_ANONYMOUS ||
 918             src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
 919                ret = madvise(region->host_mem, npages * vm->page_size,
 920                              src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
 921                TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
 922                            region->host_mem, npages * vm->page_size,
 923                            vm_mem_backing_src_alias(src_type)->name);
 924        }
 925
 926        region->unused_phy_pages = sparsebit_alloc();
 927        sparsebit_set_num(region->unused_phy_pages,
 928                guest_paddr >> vm->page_shift, npages);
 929        region->region.slot = slot;
 930        region->region.flags = flags;
 931        region->region.guest_phys_addr = guest_paddr;
 932        region->region.memory_size = npages * vm->page_size;
 933        region->region.userspace_addr = (uintptr_t) region->host_mem;
 934        ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
 935        TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
 936                "  rc: %i errno: %i\n"
 937                "  slot: %u flags: 0x%x\n"
 938                "  guest_phys_addr: 0x%lx size: 0x%lx",
 939                ret, errno, slot, flags,
 940                guest_paddr, (uint64_t) region->region.memory_size);
 941
 942        /* Add to quick lookup data structures */
 943        vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
 944        vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
 945        hash_add(vm->regions.slot_hash, &region->slot_node, slot);
 946
 947        /* If shared memory, create an alias. */
 948        if (region->fd >= 0) {
 949                region->mmap_alias = mmap(NULL, region->mmap_size,
 950                                          PROT_READ | PROT_WRITE,
 951                                          vm_mem_backing_src_alias(src_type)->flag,
 952                                          region->fd, 0);
 953                TEST_ASSERT(region->mmap_alias != MAP_FAILED,
 954                            "mmap of alias failed, errno: %i", errno);
 955
 956                /* Align host alias address */
 957                region->host_alias = align(region->mmap_alias, alignment);
 958        }
 959}
 960
 961/*
 962 * Memslot to region
 963 *
 964 * Input Args:
 965 *   vm - Virtual Machine
 966 *   memslot - KVM memory slot ID
 967 *
 968 * Output Args: None
 969 *
 970 * Return:
 971 *   Pointer to memory region structure that describe memory region
 972 *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
 973 *   on error (e.g. currently no memory region using memslot as a KVM
 974 *   memory slot ID).
 975 */
 976struct userspace_mem_region *
 977memslot2region(struct kvm_vm *vm, uint32_t memslot)
 978{
 979        struct userspace_mem_region *region;
 980
 981        hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
 982                               memslot)
 983                if (region->region.slot == memslot)
 984                        return region;
 985
 986        fprintf(stderr, "No mem region with the requested slot found,\n"
 987                "  requested slot: %u\n", memslot);
 988        fputs("---- vm dump ----\n", stderr);
 989        vm_dump(stderr, vm, 2);
 990        TEST_FAIL("Mem region not found");
 991        return NULL;
 992}
 993
 994/*
 995 * VM Memory Region Flags Set
 996 *
 997 * Input Args:
 998 *   vm - Virtual Machine
 999 *   flags - Starting guest physical address
1000 *
1001 * Output Args: None
1002 *
1003 * Return: None
1004 *
1005 * Sets the flags of the memory region specified by the value of slot,
1006 * to the values given by flags.
1007 */
1008void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
1009{
1010        int ret;
1011        struct userspace_mem_region *region;
1012
1013        region = memslot2region(vm, slot);
1014
1015        region->region.flags = flags;
1016
1017        ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
1018
1019        TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
1020                "  rc: %i errno: %i slot: %u flags: 0x%x",
1021                ret, errno, slot, flags);
1022}
1023
1024/*
1025 * VM Memory Region Move
1026 *
1027 * Input Args:
1028 *   vm - Virtual Machine
1029 *   slot - Slot of the memory region to move
1030 *   new_gpa - Starting guest physical address
1031 *
1032 * Output Args: None
1033 *
1034 * Return: None
1035 *
1036 * Change the gpa of a memory region.
1037 */
1038void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
1039{
1040        struct userspace_mem_region *region;
1041        int ret;
1042
1043        region = memslot2region(vm, slot);
1044
1045        region->region.guest_phys_addr = new_gpa;
1046
1047        ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
1048
1049        TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
1050                    "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
1051                    ret, errno, slot, new_gpa);
1052}
1053
1054/*
1055 * VM Memory Region Delete
1056 *
1057 * Input Args:
1058 *   vm - Virtual Machine
1059 *   slot - Slot of the memory region to delete
1060 *
1061 * Output Args: None
1062 *
1063 * Return: None
1064 *
1065 * Delete a memory region.
1066 */
1067void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
1068{
1069        __vm_mem_region_delete(vm, memslot2region(vm, slot), true);
1070}
1071
1072/*
1073 * VCPU mmap Size
1074 *
1075 * Input Args: None
1076 *
1077 * Output Args: None
1078 *
1079 * Return:
1080 *   Size of VCPU state
1081 *
1082 * Returns the size of the structure pointed to by the return value
1083 * of vcpu_state().
1084 */
1085static int vcpu_mmap_sz(void)
1086{
1087        int dev_fd, ret;
1088
1089        dev_fd = open_kvm_dev_path_or_exit();
1090
1091        ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
1092        TEST_ASSERT(ret >= sizeof(struct kvm_run),
1093                "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
1094                __func__, ret, errno);
1095
1096        close(dev_fd);
1097
1098        return ret;
1099}
1100
1101/*
1102 * VM VCPU Add
1103 *
1104 * Input Args:
1105 *   vm - Virtual Machine
1106 *   vcpuid - VCPU ID
1107 *
1108 * Output Args: None
1109 *
1110 * Return: None
1111 *
1112 * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
1113 * No additional VCPU setup is done.
1114 */
1115void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
1116{
1117        struct vcpu *vcpu;
1118
1119        /* Confirm a vcpu with the specified id doesn't already exist. */
1120        vcpu = vcpu_find(vm, vcpuid);
1121        if (vcpu != NULL)
1122                TEST_FAIL("vcpu with the specified id "
1123                        "already exists,\n"
1124                        "  requested vcpuid: %u\n"
1125                        "  existing vcpuid: %u state: %p",
1126                        vcpuid, vcpu->id, vcpu->state);
1127
1128        /* Allocate and initialize new vcpu structure. */
1129        vcpu = calloc(1, sizeof(*vcpu));
1130        TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
1131        vcpu->id = vcpuid;
1132        vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
1133        TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
1134                vcpu->fd, errno);
1135
1136        TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
1137                "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
1138                vcpu_mmap_sz(), sizeof(*vcpu->state));
1139        vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
1140                PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
1141        TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
1142                "vcpu id: %u errno: %i", vcpuid, errno);
1143
1144        /* Add to linked-list of VCPUs. */
1145        list_add(&vcpu->list, &vm->vcpus);
1146}
1147
1148/*
1149 * VM Virtual Address Unused Gap
1150 *
1151 * Input Args:
1152 *   vm - Virtual Machine
1153 *   sz - Size (bytes)
1154 *   vaddr_min - Minimum Virtual Address
1155 *
1156 * Output Args: None
1157 *
1158 * Return:
1159 *   Lowest virtual address at or below vaddr_min, with at least
1160 *   sz unused bytes.  TEST_ASSERT failure if no area of at least
1161 *   size sz is available.
1162 *
1163 * Within the VM specified by vm, locates the lowest starting virtual
1164 * address >= vaddr_min, that has at least sz unallocated bytes.  A
1165 * TEST_ASSERT failure occurs for invalid input or no area of at least
1166 * sz unallocated bytes >= vaddr_min is available.
1167 */
1168static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
1169                                      vm_vaddr_t vaddr_min)
1170{
1171        uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
1172
1173        /* Determine lowest permitted virtual page index. */
1174        uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
1175        if ((pgidx_start * vm->page_size) < vaddr_min)
1176                goto no_va_found;
1177
1178        /* Loop over section with enough valid virtual page indexes. */
1179        if (!sparsebit_is_set_num(vm->vpages_valid,
1180                pgidx_start, pages))
1181                pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
1182                        pgidx_start, pages);
1183        do {
1184                /*
1185                 * Are there enough unused virtual pages available at
1186                 * the currently proposed starting virtual page index.
1187                 * If not, adjust proposed starting index to next
1188                 * possible.
1189                 */
1190                if (sparsebit_is_clear_num(vm->vpages_mapped,
1191                        pgidx_start, pages))
1192                        goto va_found;
1193                pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
1194                        pgidx_start, pages);
1195                if (pgidx_start == 0)
1196                        goto no_va_found;
1197
1198                /*
1199                 * If needed, adjust proposed starting virtual address,
1200                 * to next range of valid virtual addresses.
1201                 */
1202                if (!sparsebit_is_set_num(vm->vpages_valid,
1203                        pgidx_start, pages)) {
1204                        pgidx_start = sparsebit_next_set_num(
1205                                vm->vpages_valid, pgidx_start, pages);
1206                        if (pgidx_start == 0)
1207                                goto no_va_found;
1208                }
1209        } while (pgidx_start != 0);
1210
1211no_va_found:
1212        TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
1213
1214        /* NOT REACHED */
1215        return -1;
1216
1217va_found:
1218        TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
1219                pgidx_start, pages),
1220                "Unexpected, invalid virtual page index range,\n"
1221                "  pgidx_start: 0x%lx\n"
1222                "  pages: 0x%lx",
1223                pgidx_start, pages);
1224        TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
1225                pgidx_start, pages),
1226                "Unexpected, pages already mapped,\n"
1227                "  pgidx_start: 0x%lx\n"
1228                "  pages: 0x%lx",
1229                pgidx_start, pages);
1230
1231        return pgidx_start * vm->page_size;
1232}
1233
1234/*
1235 * VM Virtual Address Allocate
1236 *
1237 * Input Args:
1238 *   vm - Virtual Machine
1239 *   sz - Size in bytes
1240 *   vaddr_min - Minimum starting virtual address
1241 *   data_memslot - Memory region slot for data pages
1242 *   pgd_memslot - Memory region slot for new virtual translation tables
1243 *
1244 * Output Args: None
1245 *
1246 * Return:
1247 *   Starting guest virtual address
1248 *
1249 * Allocates at least sz bytes within the virtual address space of the vm
1250 * given by vm.  The allocated bytes are mapped to a virtual address >=
1251 * the address given by vaddr_min.  Note that each allocation uses a
1252 * a unique set of pages, with the minimum real allocation being at least
1253 * a page.
1254 */
1255vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
1256{
1257        uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
1258
1259        virt_pgd_alloc(vm);
1260        vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
1261                                              KVM_UTIL_MIN_PFN * vm->page_size, 0);
1262
1263        /*
1264         * Find an unused range of virtual page addresses of at least
1265         * pages in length.
1266         */
1267        vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
1268
1269        /* Map the virtual pages. */
1270        for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
1271                pages--, vaddr += vm->page_size, paddr += vm->page_size) {
1272
1273                virt_pg_map(vm, vaddr, paddr);
1274
1275                sparsebit_set(vm->vpages_mapped,
1276                        vaddr >> vm->page_shift);
1277        }
1278
1279        return vaddr_start;
1280}
1281
1282/*
1283 * VM Virtual Address Allocate Pages
1284 *
1285 * Input Args:
1286 *   vm - Virtual Machine
1287 *
1288 * Output Args: None
1289 *
1290 * Return:
1291 *   Starting guest virtual address
1292 *
1293 * Allocates at least N system pages worth of bytes within the virtual address
1294 * space of the vm.
1295 */
1296vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
1297{
1298        return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
1299}
1300
1301/*
1302 * VM Virtual Address Allocate Page
1303 *
1304 * Input Args:
1305 *   vm - Virtual Machine
1306 *
1307 * Output Args: None
1308 *
1309 * Return:
1310 *   Starting guest virtual address
1311 *
1312 * Allocates at least one system page worth of bytes within the virtual address
1313 * space of the vm.
1314 */
1315vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
1316{
1317        return vm_vaddr_alloc_pages(vm, 1);
1318}
1319
1320/*
1321 * Map a range of VM virtual address to the VM's physical address
1322 *
1323 * Input Args:
1324 *   vm - Virtual Machine
1325 *   vaddr - Virtuall address to map
1326 *   paddr - VM Physical Address
1327 *   npages - The number of pages to map
1328 *   pgd_memslot - Memory region slot for new virtual translation tables
1329 *
1330 * Output Args: None
1331 *
1332 * Return: None
1333 *
1334 * Within the VM given by @vm, creates a virtual translation for
1335 * @npages starting at @vaddr to the page range starting at @paddr.
1336 */
1337void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1338              unsigned int npages)
1339{
1340        size_t page_size = vm->page_size;
1341        size_t size = npages * page_size;
1342
1343        TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
1344        TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
1345
1346        while (npages--) {
1347                virt_pg_map(vm, vaddr, paddr);
1348                vaddr += page_size;
1349                paddr += page_size;
1350        }
1351}
1352
1353/*
1354 * Address VM Physical to Host Virtual
1355 *
1356 * Input Args:
1357 *   vm - Virtual Machine
1358 *   gpa - VM physical address
1359 *
1360 * Output Args: None
1361 *
1362 * Return:
1363 *   Equivalent host virtual address
1364 *
1365 * Locates the memory region containing the VM physical address given
1366 * by gpa, within the VM given by vm.  When found, the host virtual
1367 * address providing the memory to the vm physical address is returned.
1368 * A TEST_ASSERT failure occurs if no region containing gpa exists.
1369 */
1370void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
1371{
1372        struct userspace_mem_region *region;
1373
1374        region = userspace_mem_region_find(vm, gpa, gpa);
1375        if (!region) {
1376                TEST_FAIL("No vm physical memory at 0x%lx", gpa);
1377                return NULL;
1378        }
1379
1380        return (void *)((uintptr_t)region->host_mem
1381                + (gpa - region->region.guest_phys_addr));
1382}
1383
1384/*
1385 * Address Host Virtual to VM Physical
1386 *
1387 * Input Args:
1388 *   vm - Virtual Machine
1389 *   hva - Host virtual address
1390 *
1391 * Output Args: None
1392 *
1393 * Return:
1394 *   Equivalent VM physical address
1395 *
1396 * Locates the memory region containing the host virtual address given
1397 * by hva, within the VM given by vm.  When found, the equivalent
1398 * VM physical address is returned. A TEST_ASSERT failure occurs if no
1399 * region containing hva exists.
1400 */
1401vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
1402{
1403        struct rb_node *node;
1404
1405        for (node = vm->regions.hva_tree.rb_node; node; ) {
1406                struct userspace_mem_region *region =
1407                        container_of(node, struct userspace_mem_region, hva_node);
1408
1409                if (hva >= region->host_mem) {
1410                        if (hva <= (region->host_mem
1411                                + region->region.memory_size - 1))
1412                                return (vm_paddr_t)((uintptr_t)
1413                                        region->region.guest_phys_addr
1414                                        + (hva - (uintptr_t)region->host_mem));
1415
1416                        node = node->rb_right;
1417                } else
1418                        node = node->rb_left;
1419        }
1420
1421        TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
1422        return -1;
1423}
1424
1425/*
1426 * Address VM physical to Host Virtual *alias*.
1427 *
1428 * Input Args:
1429 *   vm - Virtual Machine
1430 *   gpa - VM physical address
1431 *
1432 * Output Args: None
1433 *
1434 * Return:
1435 *   Equivalent address within the host virtual *alias* area, or NULL
1436 *   (without failing the test) if the guest memory is not shared (so
1437 *   no alias exists).
1438 *
1439 * When vm_create() and related functions are called with a shared memory
1440 * src_type, we also create a writable, shared alias mapping of the
1441 * underlying guest memory. This allows the host to manipulate guest memory
1442 * without mapping that memory in the guest's address space. And, for
1443 * userfaultfd-based demand paging, we can do so without triggering userfaults.
1444 */
1445void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
1446{
1447        struct userspace_mem_region *region;
1448        uintptr_t offset;
1449
1450        region = userspace_mem_region_find(vm, gpa, gpa);
1451        if (!region)
1452                return NULL;
1453
1454        if (!region->host_alias)
1455                return NULL;
1456
1457        offset = gpa - region->region.guest_phys_addr;
1458        return (void *) ((uintptr_t) region->host_alias + offset);
1459}
1460
1461/*
1462 * VM Create IRQ Chip
1463 *
1464 * Input Args:
1465 *   vm - Virtual Machine
1466 *
1467 * Output Args: None
1468 *
1469 * Return: None
1470 *
1471 * Creates an interrupt controller chip for the VM specified by vm.
1472 */
1473void vm_create_irqchip(struct kvm_vm *vm)
1474{
1475        int ret;
1476
1477        ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
1478        TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
1479                "rc: %i errno: %i", ret, errno);
1480
1481        vm->has_irqchip = true;
1482}
1483
1484/*
1485 * VM VCPU State
1486 *
1487 * Input Args:
1488 *   vm - Virtual Machine
1489 *   vcpuid - VCPU ID
1490 *
1491 * Output Args: None
1492 *
1493 * Return:
1494 *   Pointer to structure that describes the state of the VCPU.
1495 *
1496 * Locates and returns a pointer to a structure that describes the
1497 * state of the VCPU with the given vcpuid.
1498 */
1499struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
1500{
1501        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1502        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1503
1504        return vcpu->state;
1505}
1506
1507/*
1508 * VM VCPU Run
1509 *
1510 * Input Args:
1511 *   vm - Virtual Machine
1512 *   vcpuid - VCPU ID
1513 *
1514 * Output Args: None
1515 *
1516 * Return: None
1517 *
1518 * Switch to executing the code for the VCPU given by vcpuid, within the VM
1519 * given by vm.
1520 */
1521void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1522{
1523        int ret = _vcpu_run(vm, vcpuid);
1524        TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1525                "rc: %i errno: %i", ret, errno);
1526}
1527
1528int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
1529{
1530        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1531        int rc;
1532
1533        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1534        do {
1535                rc = ioctl(vcpu->fd, KVM_RUN, NULL);
1536        } while (rc == -1 && errno == EINTR);
1537
1538        assert_on_unhandled_exception(vm, vcpuid);
1539
1540        return rc;
1541}
1542
1543int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
1544{
1545        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1546
1547        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1548
1549        return vcpu->fd;
1550}
1551
1552void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
1553{
1554        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1555        int ret;
1556
1557        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1558
1559        vcpu->state->immediate_exit = 1;
1560        ret = ioctl(vcpu->fd, KVM_RUN, NULL);
1561        vcpu->state->immediate_exit = 0;
1562
1563        TEST_ASSERT(ret == -1 && errno == EINTR,
1564                    "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
1565                    ret, errno);
1566}
1567
1568void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
1569                          struct kvm_guest_debug *debug)
1570{
1571        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1572        int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
1573
1574        TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
1575}
1576
1577/*
1578 * VM VCPU Set MP State
1579 *
1580 * Input Args:
1581 *   vm - Virtual Machine
1582 *   vcpuid - VCPU ID
1583 *   mp_state - mp_state to be set
1584 *
1585 * Output Args: None
1586 *
1587 * Return: None
1588 *
1589 * Sets the MP state of the VCPU given by vcpuid, to the state given
1590 * by mp_state.
1591 */
1592void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
1593                       struct kvm_mp_state *mp_state)
1594{
1595        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1596        int ret;
1597
1598        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1599
1600        ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
1601        TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
1602                "rc: %i errno: %i", ret, errno);
1603}
1604
1605/*
1606 * VM VCPU Get Reg List
1607 *
1608 * Input Args:
1609 *   vm - Virtual Machine
1610 *   vcpuid - VCPU ID
1611 *
1612 * Output Args:
1613 *   None
1614 *
1615 * Return:
1616 *   A pointer to an allocated struct kvm_reg_list
1617 *
1618 * Get the list of guest registers which are supported for
1619 * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
1620 */
1621struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
1622{
1623        struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
1624        int ret;
1625
1626        ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
1627        TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
1628        reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
1629        reg_list->n = reg_list_n.n;
1630        vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
1631        return reg_list;
1632}
1633
1634/*
1635 * VM VCPU Regs Get
1636 *
1637 * Input Args:
1638 *   vm - Virtual Machine
1639 *   vcpuid - VCPU ID
1640 *
1641 * Output Args:
1642 *   regs - current state of VCPU regs
1643 *
1644 * Return: None
1645 *
1646 * Obtains the current register state for the VCPU specified by vcpuid
1647 * and stores it at the location given by regs.
1648 */
1649void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1650{
1651        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1652        int ret;
1653
1654        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1655
1656        ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
1657        TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
1658                ret, errno);
1659}
1660
1661/*
1662 * VM VCPU Regs Set
1663 *
1664 * Input Args:
1665 *   vm - Virtual Machine
1666 *   vcpuid - VCPU ID
1667 *   regs - Values to set VCPU regs to
1668 *
1669 * Output Args: None
1670 *
1671 * Return: None
1672 *
1673 * Sets the regs of the VCPU specified by vcpuid to the values
1674 * given by regs.
1675 */
1676void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
1677{
1678        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1679        int ret;
1680
1681        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1682
1683        ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
1684        TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
1685                ret, errno);
1686}
1687
1688#ifdef __KVM_HAVE_VCPU_EVENTS
1689void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
1690                     struct kvm_vcpu_events *events)
1691{
1692        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1693        int ret;
1694
1695        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1696
1697        ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
1698        TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
1699                ret, errno);
1700}
1701
1702void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
1703                     struct kvm_vcpu_events *events)
1704{
1705        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1706        int ret;
1707
1708        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1709
1710        ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
1711        TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
1712                ret, errno);
1713}
1714#endif
1715
1716#ifdef __x86_64__
1717void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
1718                           struct kvm_nested_state *state)
1719{
1720        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1721        int ret;
1722
1723        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1724
1725        ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
1726        TEST_ASSERT(ret == 0,
1727                "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1728                ret, errno);
1729}
1730
1731int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
1732                          struct kvm_nested_state *state, bool ignore_error)
1733{
1734        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1735        int ret;
1736
1737        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1738
1739        ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
1740        if (!ignore_error) {
1741                TEST_ASSERT(ret == 0,
1742                        "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
1743                        ret, errno);
1744        }
1745
1746        return ret;
1747}
1748#endif
1749
1750/*
1751 * VM VCPU System Regs Get
1752 *
1753 * Input Args:
1754 *   vm - Virtual Machine
1755 *   vcpuid - VCPU ID
1756 *
1757 * Output Args:
1758 *   sregs - current state of VCPU system regs
1759 *
1760 * Return: None
1761 *
1762 * Obtains the current system register state for the VCPU specified by
1763 * vcpuid and stores it at the location given by sregs.
1764 */
1765void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1766{
1767        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1768        int ret;
1769
1770        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1771
1772        ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
1773        TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
1774                ret, errno);
1775}
1776
1777/*
1778 * VM VCPU System Regs Set
1779 *
1780 * Input Args:
1781 *   vm - Virtual Machine
1782 *   vcpuid - VCPU ID
1783 *   sregs - Values to set VCPU system regs to
1784 *
1785 * Output Args: None
1786 *
1787 * Return: None
1788 *
1789 * Sets the system regs of the VCPU specified by vcpuid to the values
1790 * given by sregs.
1791 */
1792void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1793{
1794        int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
1795        TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
1796                "rc: %i errno: %i", ret, errno);
1797}
1798
1799int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
1800{
1801        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1802
1803        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1804
1805        return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
1806}
1807
1808void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1809{
1810        int ret;
1811
1812        ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
1813        TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
1814                    ret, errno, strerror(errno));
1815}
1816
1817void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
1818{
1819        int ret;
1820
1821        ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
1822        TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
1823                    ret, errno, strerror(errno));
1824}
1825
1826void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1827{
1828        int ret;
1829
1830        ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
1831        TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
1832                    ret, errno, strerror(errno));
1833}
1834
1835void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
1836{
1837        int ret;
1838
1839        ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
1840        TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
1841                    ret, errno, strerror(errno));
1842}
1843
1844/*
1845 * VCPU Ioctl
1846 *
1847 * Input Args:
1848 *   vm - Virtual Machine
1849 *   vcpuid - VCPU ID
1850 *   cmd - Ioctl number
1851 *   arg - Argument to pass to the ioctl
1852 *
1853 * Return: None
1854 *
1855 * Issues an arbitrary ioctl on a VCPU fd.
1856 */
1857void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1858                unsigned long cmd, void *arg)
1859{
1860        int ret;
1861
1862        ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
1863        TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
1864                cmd, ret, errno, strerror(errno));
1865}
1866
1867int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
1868                unsigned long cmd, void *arg)
1869{
1870        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1871        int ret;
1872
1873        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
1874
1875        ret = ioctl(vcpu->fd, cmd, arg);
1876
1877        return ret;
1878}
1879
1880void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
1881{
1882        struct vcpu *vcpu;
1883        uint32_t size = vm->dirty_ring_size;
1884
1885        TEST_ASSERT(size > 0, "Should enable dirty ring first");
1886
1887        vcpu = vcpu_find(vm, vcpuid);
1888
1889        TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
1890
1891        if (!vcpu->dirty_gfns) {
1892                void *addr;
1893
1894                addr = mmap(NULL, size, PROT_READ,
1895                            MAP_PRIVATE, vcpu->fd,
1896                            vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1897                TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
1898
1899                addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
1900                            MAP_PRIVATE, vcpu->fd,
1901                            vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1902                TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
1903
1904                addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
1905                            MAP_SHARED, vcpu->fd,
1906                            vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
1907                TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
1908
1909                vcpu->dirty_gfns = addr;
1910                vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
1911        }
1912
1913        return vcpu->dirty_gfns;
1914}
1915
1916/*
1917 * VM Ioctl
1918 *
1919 * Input Args:
1920 *   vm - Virtual Machine
1921 *   cmd - Ioctl number
1922 *   arg - Argument to pass to the ioctl
1923 *
1924 * Return: None
1925 *
1926 * Issues an arbitrary ioctl on a VM fd.
1927 */
1928void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1929{
1930        int ret;
1931
1932        ret = _vm_ioctl(vm, cmd, arg);
1933        TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
1934                cmd, ret, errno, strerror(errno));
1935}
1936
1937int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1938{
1939        return ioctl(vm->fd, cmd, arg);
1940}
1941
1942/*
1943 * KVM system ioctl
1944 *
1945 * Input Args:
1946 *   vm - Virtual Machine
1947 *   cmd - Ioctl number
1948 *   arg - Argument to pass to the ioctl
1949 *
1950 * Return: None
1951 *
1952 * Issues an arbitrary ioctl on a KVM fd.
1953 */
1954void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1955{
1956        int ret;
1957
1958        ret = ioctl(vm->kvm_fd, cmd, arg);
1959        TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
1960                cmd, ret, errno, strerror(errno));
1961}
1962
1963int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
1964{
1965        return ioctl(vm->kvm_fd, cmd, arg);
1966}
1967
1968/*
1969 * Device Ioctl
1970 */
1971
1972int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
1973{
1974        struct kvm_device_attr attribute = {
1975                .group = group,
1976                .attr = attr,
1977                .flags = 0,
1978        };
1979
1980        return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
1981}
1982
1983int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
1984{
1985        int ret = _kvm_device_check_attr(dev_fd, group, attr);
1986
1987        TEST_ASSERT(ret >= 0, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
1988        return ret;
1989}
1990
1991int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd)
1992{
1993        struct kvm_create_device create_dev;
1994        int ret;
1995
1996        create_dev.type = type;
1997        create_dev.fd = -1;
1998        create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
1999        ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev);
2000        *fd = create_dev.fd;
2001        return ret;
2002}
2003
2004int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test)
2005{
2006        int fd, ret;
2007
2008        ret = _kvm_create_device(vm, type, test, &fd);
2009
2010        if (!test) {
2011                TEST_ASSERT(ret >= 0,
2012                            "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno);
2013                return fd;
2014        }
2015        return ret;
2016}
2017
2018int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
2019                      void *val, bool write)
2020{
2021        struct kvm_device_attr kvmattr = {
2022                .group = group,
2023                .attr = attr,
2024                .flags = 0,
2025                .addr = (uintptr_t)val,
2026        };
2027        int ret;
2028
2029        ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
2030                    &kvmattr);
2031        return ret;
2032}
2033
2034int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
2035                      void *val, bool write)
2036{
2037        int ret = _kvm_device_access(dev_fd, group, attr, val, write);
2038
2039        TEST_ASSERT(ret >= 0, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
2040        return ret;
2041}
2042
2043/*
2044 * VM Dump
2045 *
2046 * Input Args:
2047 *   vm - Virtual Machine
2048 *   indent - Left margin indent amount
2049 *
2050 * Output Args:
2051 *   stream - Output FILE stream
2052 *
2053 * Return: None
2054 *
2055 * Dumps the current state of the VM given by vm, to the FILE stream
2056 * given by stream.
2057 */
2058void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
2059{
2060        int ctr;
2061        struct userspace_mem_region *region;
2062        struct vcpu *vcpu;
2063
2064        fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
2065        fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
2066        fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
2067        fprintf(stream, "%*sMem Regions:\n", indent, "");
2068        hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
2069                fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
2070                        "host_virt: %p\n", indent + 2, "",
2071                        (uint64_t) region->region.guest_phys_addr,
2072                        (uint64_t) region->region.memory_size,
2073                        region->host_mem);
2074                fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
2075                sparsebit_dump(stream, region->unused_phy_pages, 0);
2076        }
2077        fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
2078        sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
2079        fprintf(stream, "%*spgd_created: %u\n", indent, "",
2080                vm->pgd_created);
2081        if (vm->pgd_created) {
2082                fprintf(stream, "%*sVirtual Translation Tables:\n",
2083                        indent + 2, "");
2084                virt_dump(stream, vm, indent + 4);
2085        }
2086        fprintf(stream, "%*sVCPUs:\n", indent, "");
2087        list_for_each_entry(vcpu, &vm->vcpus, list)
2088                vcpu_dump(stream, vm, vcpu->id, indent + 2);
2089}
2090
2091/* Known KVM exit reasons */
2092static struct exit_reason {
2093        unsigned int reason;
2094        const char *name;
2095} exit_reasons_known[] = {
2096        {KVM_EXIT_UNKNOWN, "UNKNOWN"},
2097        {KVM_EXIT_EXCEPTION, "EXCEPTION"},
2098        {KVM_EXIT_IO, "IO"},
2099        {KVM_EXIT_HYPERCALL, "HYPERCALL"},
2100        {KVM_EXIT_DEBUG, "DEBUG"},
2101        {KVM_EXIT_HLT, "HLT"},
2102        {KVM_EXIT_MMIO, "MMIO"},
2103        {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
2104        {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
2105        {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
2106        {KVM_EXIT_INTR, "INTR"},
2107        {KVM_EXIT_SET_TPR, "SET_TPR"},
2108        {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
2109        {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
2110        {KVM_EXIT_S390_RESET, "S390_RESET"},
2111        {KVM_EXIT_DCR, "DCR"},
2112        {KVM_EXIT_NMI, "NMI"},
2113        {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
2114        {KVM_EXIT_OSI, "OSI"},
2115        {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
2116        {KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
2117        {KVM_EXIT_X86_RDMSR, "RDMSR"},
2118        {KVM_EXIT_X86_WRMSR, "WRMSR"},
2119        {KVM_EXIT_XEN, "XEN"},
2120#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
2121        {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
2122#endif
2123};
2124
2125/*
2126 * Exit Reason String
2127 *
2128 * Input Args:
2129 *   exit_reason - Exit reason
2130 *
2131 * Output Args: None
2132 *
2133 * Return:
2134 *   Constant string pointer describing the exit reason.
2135 *
2136 * Locates and returns a constant string that describes the KVM exit
2137 * reason given by exit_reason.  If no such string is found, a constant
2138 * string of "Unknown" is returned.
2139 */
2140const char *exit_reason_str(unsigned int exit_reason)
2141{
2142        unsigned int n1;
2143
2144        for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
2145                if (exit_reason == exit_reasons_known[n1].reason)
2146                        return exit_reasons_known[n1].name;
2147        }
2148
2149        return "Unknown";
2150}
2151
2152/*
2153 * Physical Contiguous Page Allocator
2154 *
2155 * Input Args:
2156 *   vm - Virtual Machine
2157 *   num - number of pages
2158 *   paddr_min - Physical address minimum
2159 *   memslot - Memory region to allocate page from
2160 *
2161 * Output Args: None
2162 *
2163 * Return:
2164 *   Starting physical address
2165 *
2166 * Within the VM specified by vm, locates a range of available physical
2167 * pages at or above paddr_min. If found, the pages are marked as in use
2168 * and their base address is returned. A TEST_ASSERT failure occurs if
2169 * not enough pages are available at or above paddr_min.
2170 */
2171vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
2172                              vm_paddr_t paddr_min, uint32_t memslot)
2173{
2174        struct userspace_mem_region *region;
2175        sparsebit_idx_t pg, base;
2176
2177        TEST_ASSERT(num > 0, "Must allocate at least one page");
2178
2179        TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
2180                "not divisible by page size.\n"
2181                "  paddr_min: 0x%lx page_size: 0x%x",
2182                paddr_min, vm->page_size);
2183
2184        region = memslot2region(vm, memslot);
2185        base = pg = paddr_min >> vm->page_shift;
2186
2187        do {
2188                for (; pg < base + num; ++pg) {
2189                        if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
2190                                base = pg = sparsebit_next_set(region->unused_phy_pages, pg);
2191                                break;
2192                        }
2193                }
2194        } while (pg && pg != base + num);
2195
2196        if (pg == 0) {
2197                fprintf(stderr, "No guest physical page available, "
2198                        "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
2199                        paddr_min, vm->page_size, memslot);
2200                fputs("---- vm dump ----\n", stderr);
2201                vm_dump(stderr, vm, 2);
2202                abort();
2203        }
2204
2205        for (pg = base; pg < base + num; ++pg)
2206                sparsebit_clear(region->unused_phy_pages, pg);
2207
2208        return base * vm->page_size;
2209}
2210
2211vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
2212                             uint32_t memslot)
2213{
2214        return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
2215}
2216
2217/* Arbitrary minimum physical address used for virtual translation tables. */
2218#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
2219
2220vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
2221{
2222        return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
2223}
2224
2225/*
2226 * Address Guest Virtual to Host Virtual
2227 *
2228 * Input Args:
2229 *   vm - Virtual Machine
2230 *   gva - VM virtual address
2231 *
2232 * Output Args: None
2233 *
2234 * Return:
2235 *   Equivalent host virtual address
2236 */
2237void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
2238{
2239        return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
2240}
2241
2242/*
2243 * Is Unrestricted Guest
2244 *
2245 * Input Args:
2246 *   vm - Virtual Machine
2247 *
2248 * Output Args: None
2249 *
2250 * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
2251 *
2252 * Check if the unrestricted guest flag is enabled.
2253 */
2254bool vm_is_unrestricted_guest(struct kvm_vm *vm)
2255{
2256        char val = 'N';
2257        size_t count;
2258        FILE *f;
2259
2260        if (vm == NULL) {
2261                /* Ensure that the KVM vendor-specific module is loaded. */
2262                close(open_kvm_dev_path_or_exit());
2263        }
2264
2265        f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
2266        if (f) {
2267                count = fread(&val, sizeof(char), 1, f);
2268                TEST_ASSERT(count == 1, "Unable to read from param file.");
2269                fclose(f);
2270        }
2271
2272        return val == 'Y';
2273}
2274
2275unsigned int vm_get_page_size(struct kvm_vm *vm)
2276{
2277        return vm->page_size;
2278}
2279
2280unsigned int vm_get_page_shift(struct kvm_vm *vm)
2281{
2282        return vm->page_shift;
2283}
2284
2285uint64_t vm_get_max_gfn(struct kvm_vm *vm)
2286{
2287        return vm->max_gfn;
2288}
2289
2290int vm_get_fd(struct kvm_vm *vm)
2291{
2292        return vm->fd;
2293}
2294
2295static unsigned int vm_calc_num_pages(unsigned int num_pages,
2296                                      unsigned int page_shift,
2297                                      unsigned int new_page_shift,
2298                                      bool ceil)
2299{
2300        unsigned int n = 1 << (new_page_shift - page_shift);
2301
2302        if (page_shift >= new_page_shift)
2303                return num_pages * (1 << (page_shift - new_page_shift));
2304
2305        return num_pages / n + !!(ceil && num_pages % n);
2306}
2307
2308static inline int getpageshift(void)
2309{
2310        return __builtin_ffs(getpagesize()) - 1;
2311}
2312
2313unsigned int
2314vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
2315{
2316        return vm_calc_num_pages(num_guest_pages,
2317                                 vm_guest_mode_params[mode].page_shift,
2318                                 getpageshift(), true);
2319}
2320
2321unsigned int
2322vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
2323{
2324        return vm_calc_num_pages(num_host_pages, getpageshift(),
2325                                 vm_guest_mode_params[mode].page_shift, false);
2326}
2327
2328unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
2329{
2330        unsigned int n;
2331        n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
2332        return vm_adjust_num_guest_pages(mode, n);
2333}
2334
2335int vm_get_stats_fd(struct kvm_vm *vm)
2336{
2337        return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
2338}
2339
2340int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
2341{
2342        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
2343
2344        return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
2345}
2346