linux/tools/testing/selftests/kvm/memslot_perf_test.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * A memslot-related performance benchmark.
   4 *
   5 * Copyright (C) 2021 Oracle and/or its affiliates.
   6 *
   7 * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
   8 */
   9#include <pthread.h>
  10#include <sched.h>
  11#include <semaphore.h>
  12#include <stdatomic.h>
  13#include <stdbool.h>
  14#include <stdint.h>
  15#include <stdio.h>
  16#include <stdlib.h>
  17#include <string.h>
  18#include <sys/mman.h>
  19#include <time.h>
  20#include <unistd.h>
  21
  22#include <linux/compiler.h>
  23
  24#include <test_util.h>
  25#include <kvm_util.h>
  26#include <processor.h>
  27
  28#define VCPU_ID 0
  29
  30#define MEM_SIZE                ((512U << 20) + 4096)
  31#define MEM_SIZE_PAGES          (MEM_SIZE / 4096)
  32#define MEM_GPA         0x10000000UL
  33#define MEM_AUX_GPA             MEM_GPA
  34#define MEM_SYNC_GPA            MEM_AUX_GPA
  35#define MEM_TEST_GPA            (MEM_AUX_GPA + 4096)
  36#define MEM_TEST_SIZE           (MEM_SIZE - 4096)
  37static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
  38static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
  39
  40/*
  41 * 32 MiB is max size that gets well over 100 iterations on 509 slots.
  42 * Considering that each slot needs to have at least one page up to
  43 * 8194 slots in use can then be tested (although with slightly
  44 * limited resolution).
  45 */
  46#define MEM_SIZE_MAP            ((32U << 20) + 4096)
  47#define MEM_SIZE_MAP_PAGES      (MEM_SIZE_MAP / 4096)
  48#define MEM_TEST_MAP_SIZE       (MEM_SIZE_MAP - 4096)
  49#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
  50static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
  51static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
  52static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
  53static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
  54
  55/*
  56 * 128 MiB is min size that fills 32k slots with at least one page in each
  57 * while at the same time gets 100+ iterations in such test
  58 */
  59#define MEM_TEST_UNMAP_SIZE             (128U << 20)
  60#define MEM_TEST_UNMAP_SIZE_PAGES       (MEM_TEST_UNMAP_SIZE / 4096)
  61/* 2 MiB chunk size like a typical huge page */
  62#define MEM_TEST_UNMAP_CHUNK_PAGES      (2U << (20 - 12))
  63static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
  64              "invalid unmap test region size");
  65static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
  66              "invalid unmap test region size");
  67static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
  68              (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
  69              "invalid unmap test region size");
  70
  71/*
  72 * For the move active test the middle of the test area is placed on
  73 * a memslot boundary: half lies in the memslot being moved, half in
  74 * other memslot(s).
  75 *
  76 * When running this test with 32k memslots (32764, really) each memslot
  77 * contains 4 pages.
  78 * The last one additionally contains the remaining 21 pages of memory,
  79 * for the total size of 25 pages.
  80 * Hence, the maximum size here is 50 pages.
  81 */
  82#define MEM_TEST_MOVE_SIZE_PAGES        (50)
  83#define MEM_TEST_MOVE_SIZE              (MEM_TEST_MOVE_SIZE_PAGES * 4096)
  84#define MEM_TEST_MOVE_GPA_DEST          (MEM_GPA + MEM_SIZE)
  85static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
  86              "invalid move test region size");
  87
  88#define MEM_TEST_VAL_1 0x1122334455667788
  89#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
  90
  91struct vm_data {
  92        struct kvm_vm *vm;
  93        pthread_t vcpu_thread;
  94        uint32_t nslots;
  95        uint64_t npages;
  96        uint64_t pages_per_slot;
  97        void **hva_slots;
  98        bool mmio_ok;
  99        uint64_t mmio_gpa_min;
 100        uint64_t mmio_gpa_max;
 101};
 102
 103struct sync_area {
 104        atomic_bool start_flag;
 105        atomic_bool exit_flag;
 106        atomic_bool sync_flag;
 107        void *move_area_ptr;
 108};
 109
 110/*
 111 * Technically, we need also for the atomic bool to be address-free, which
 112 * is recommended, but not strictly required, by C11 for lockless
 113 * implementations.
 114 * However, in practice both GCC and Clang fulfill this requirement on
 115 * all KVM-supported platforms.
 116 */
 117static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
 118
 119static sem_t vcpu_ready;
 120
 121static bool map_unmap_verify;
 122
 123static bool verbose;
 124#define pr_info_v(...)                          \
 125        do {                                    \
 126                if (verbose)                    \
 127                        pr_info(__VA_ARGS__);   \
 128        } while (0)
 129
 130static void *vcpu_worker(void *data)
 131{
 132        struct vm_data *vm = data;
 133        struct kvm_run *run;
 134        struct ucall uc;
 135        uint64_t cmd;
 136
 137        run = vcpu_state(vm->vm, VCPU_ID);
 138        while (1) {
 139                vcpu_run(vm->vm, VCPU_ID);
 140
 141                if (run->exit_reason == KVM_EXIT_IO) {
 142                        cmd = get_ucall(vm->vm, VCPU_ID, &uc);
 143                        if (cmd != UCALL_SYNC)
 144                                break;
 145
 146                        sem_post(&vcpu_ready);
 147                        continue;
 148                }
 149
 150                if (run->exit_reason != KVM_EXIT_MMIO)
 151                        break;
 152
 153                TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
 154                TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
 155                TEST_ASSERT(run->mmio.len == 8,
 156                            "Unexpected exit mmio size = %u", run->mmio.len);
 157                TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
 158                            run->mmio.phys_addr <= vm->mmio_gpa_max,
 159                            "Unexpected exit mmio address = 0x%llx",
 160                            run->mmio.phys_addr);
 161        }
 162
 163        if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
 164                TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
 165                          __FILE__, uc.args[1], uc.args[2]);
 166
 167        return NULL;
 168}
 169
 170static void wait_for_vcpu(void)
 171{
 172        struct timespec ts;
 173
 174        TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
 175                    "clock_gettime() failed: %d\n", errno);
 176
 177        ts.tv_sec += 2;
 178        TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
 179                    "sem_timedwait() failed: %d\n", errno);
 180}
 181
 182static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
 183{
 184        uint64_t gpage, pgoffs;
 185        uint32_t slot, slotoffs;
 186        void *base;
 187
 188        TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
 189        TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
 190                    "Too high gpa to translate");
 191        gpa -= MEM_GPA;
 192
 193        gpage = gpa / 4096;
 194        pgoffs = gpa % 4096;
 195        slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
 196        slotoffs = gpage - (slot * data->pages_per_slot);
 197
 198        if (rempages) {
 199                uint64_t slotpages;
 200
 201                if (slot == data->nslots - 1)
 202                        slotpages = data->npages - slot * data->pages_per_slot;
 203                else
 204                        slotpages = data->pages_per_slot;
 205
 206                TEST_ASSERT(!pgoffs,
 207                            "Asking for remaining pages in slot but gpa not page aligned");
 208                *rempages = slotpages - slotoffs;
 209        }
 210
 211        base = data->hva_slots[slot];
 212        return (uint8_t *)base + slotoffs * 4096 + pgoffs;
 213}
 214
 215static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
 216{
 217        TEST_ASSERT(slot < data->nslots, "Too high slot number");
 218
 219        return MEM_GPA + slot * data->pages_per_slot * 4096;
 220}
 221
 222static struct vm_data *alloc_vm(void)
 223{
 224        struct vm_data *data;
 225
 226        data = malloc(sizeof(*data));
 227        TEST_ASSERT(data, "malloc(vmdata) failed");
 228
 229        data->vm = NULL;
 230        data->hva_slots = NULL;
 231
 232        return data;
 233}
 234
 235static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 236                       void *guest_code, uint64_t mempages,
 237                       struct timespec *slot_runtime)
 238{
 239        uint32_t max_mem_slots;
 240        uint64_t rempages;
 241        uint64_t guest_addr;
 242        uint32_t slot;
 243        struct timespec tstart;
 244        struct sync_area *sync;
 245
 246        max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
 247        TEST_ASSERT(max_mem_slots > 1,
 248                    "KVM_CAP_NR_MEMSLOTS should be greater than 1");
 249        TEST_ASSERT(nslots > 1 || nslots == -1,
 250                    "Slot count cap should be greater than 1");
 251        if (nslots != -1)
 252                max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
 253        pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
 254
 255        TEST_ASSERT(mempages > 1,
 256                    "Can't test without any memory");
 257
 258        data->npages = mempages;
 259        data->nslots = max_mem_slots - 1;
 260        data->pages_per_slot = mempages / data->nslots;
 261        if (!data->pages_per_slot) {
 262                *maxslots = mempages + 1;
 263                return false;
 264        }
 265
 266        rempages = mempages % data->nslots;
 267        data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
 268        TEST_ASSERT(data->hva_slots, "malloc() fail");
 269
 270        data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
 271
 272        pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
 273                max_mem_slots - 1, data->pages_per_slot, rempages);
 274
 275        clock_gettime(CLOCK_MONOTONIC, &tstart);
 276        for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
 277                uint64_t npages;
 278
 279                npages = data->pages_per_slot;
 280                if (slot == max_mem_slots - 1)
 281                        npages += rempages;
 282
 283                vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
 284                                            guest_addr, slot, npages,
 285                                            0);
 286                guest_addr += npages * 4096;
 287        }
 288        *slot_runtime = timespec_elapsed(tstart);
 289
 290        for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
 291                uint64_t npages;
 292                uint64_t gpa;
 293
 294                npages = data->pages_per_slot;
 295                if (slot == max_mem_slots - 2)
 296                        npages += rempages;
 297
 298                gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
 299                                         slot + 1);
 300                TEST_ASSERT(gpa == guest_addr,
 301                            "vm_phy_pages_alloc() failed\n");
 302
 303                data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
 304                memset(data->hva_slots[slot], 0, npages * 4096);
 305
 306                guest_addr += npages * 4096;
 307        }
 308
 309        virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
 310
 311        sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 312        atomic_init(&sync->start_flag, false);
 313        atomic_init(&sync->exit_flag, false);
 314        atomic_init(&sync->sync_flag, false);
 315
 316        data->mmio_ok = false;
 317
 318        return true;
 319}
 320
 321static void launch_vm(struct vm_data *data)
 322{
 323        pr_info_v("Launching the test VM\n");
 324
 325        pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
 326
 327        /* Ensure the guest thread is spun up. */
 328        wait_for_vcpu();
 329}
 330
 331static void free_vm(struct vm_data *data)
 332{
 333        kvm_vm_free(data->vm);
 334        free(data->hva_slots);
 335        free(data);
 336}
 337
 338static void wait_guest_exit(struct vm_data *data)
 339{
 340        pthread_join(data->vcpu_thread, NULL);
 341}
 342
 343static void let_guest_run(struct sync_area *sync)
 344{
 345        atomic_store_explicit(&sync->start_flag, true, memory_order_release);
 346}
 347
 348static void guest_spin_until_start(void)
 349{
 350        struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 351
 352        while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
 353                ;
 354}
 355
 356static void make_guest_exit(struct sync_area *sync)
 357{
 358        atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
 359}
 360
 361static bool _guest_should_exit(void)
 362{
 363        struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 364
 365        return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
 366}
 367
 368#define guest_should_exit() unlikely(_guest_should_exit())
 369
 370/*
 371 * noinline so we can easily see how much time the host spends waiting
 372 * for the guest.
 373 * For the same reason use alarm() instead of polling clock_gettime()
 374 * to implement a wait timeout.
 375 */
 376static noinline void host_perform_sync(struct sync_area *sync)
 377{
 378        alarm(2);
 379
 380        atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
 381        while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
 382                ;
 383
 384        alarm(0);
 385}
 386
 387static bool guest_perform_sync(void)
 388{
 389        struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 390        bool expected;
 391
 392        do {
 393                if (guest_should_exit())
 394                        return false;
 395
 396                expected = true;
 397        } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
 398                                                        &expected, false,
 399                                                        memory_order_acq_rel,
 400                                                        memory_order_relaxed));
 401
 402        return true;
 403}
 404
 405static void guest_code_test_memslot_move(void)
 406{
 407        struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 408        uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
 409
 410        GUEST_SYNC(0);
 411
 412        guest_spin_until_start();
 413
 414        while (!guest_should_exit()) {
 415                uintptr_t ptr;
 416
 417                for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
 418                     ptr += 4096)
 419                        *(uint64_t *)ptr = MEM_TEST_VAL_1;
 420
 421                /*
 422                 * No host sync here since the MMIO exits are so expensive
 423                 * that the host would spend most of its time waiting for
 424                 * the guest and so instead of measuring memslot move
 425                 * performance we would measure the performance and
 426                 * likelihood of MMIO exits
 427                 */
 428        }
 429
 430        GUEST_DONE();
 431}
 432
 433static void guest_code_test_memslot_map(void)
 434{
 435        struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 436
 437        GUEST_SYNC(0);
 438
 439        guest_spin_until_start();
 440
 441        while (1) {
 442                uintptr_t ptr;
 443
 444                for (ptr = MEM_TEST_GPA;
 445                     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
 446                        *(uint64_t *)ptr = MEM_TEST_VAL_1;
 447
 448                if (!guest_perform_sync())
 449                        break;
 450
 451                for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
 452                     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
 453                        *(uint64_t *)ptr = MEM_TEST_VAL_2;
 454
 455                if (!guest_perform_sync())
 456                        break;
 457        }
 458
 459        GUEST_DONE();
 460}
 461
 462static void guest_code_test_memslot_unmap(void)
 463{
 464        struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 465
 466        GUEST_SYNC(0);
 467
 468        guest_spin_until_start();
 469
 470        while (1) {
 471                uintptr_t ptr = MEM_TEST_GPA;
 472
 473                /*
 474                 * We can afford to access (map) just a small number of pages
 475                 * per host sync as otherwise the host will spend
 476                 * a significant amount of its time waiting for the guest
 477                 * (instead of doing unmap operations), so this will
 478                 * effectively turn this test into a map performance test.
 479                 *
 480                 * Just access a single page to be on the safe side.
 481                 */
 482                *(uint64_t *)ptr = MEM_TEST_VAL_1;
 483
 484                if (!guest_perform_sync())
 485                        break;
 486
 487                ptr += MEM_TEST_UNMAP_SIZE / 2;
 488                *(uint64_t *)ptr = MEM_TEST_VAL_2;
 489
 490                if (!guest_perform_sync())
 491                        break;
 492        }
 493
 494        GUEST_DONE();
 495}
 496
 497static void guest_code_test_memslot_rw(void)
 498{
 499        GUEST_SYNC(0);
 500
 501        guest_spin_until_start();
 502
 503        while (1) {
 504                uintptr_t ptr;
 505
 506                for (ptr = MEM_TEST_GPA;
 507                     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
 508                        *(uint64_t *)ptr = MEM_TEST_VAL_1;
 509
 510                if (!guest_perform_sync())
 511                        break;
 512
 513                for (ptr = MEM_TEST_GPA + 4096 / 2;
 514                     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
 515                        uint64_t val = *(uint64_t *)ptr;
 516
 517                        GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
 518                        *(uint64_t *)ptr = 0;
 519                }
 520
 521                if (!guest_perform_sync())
 522                        break;
 523        }
 524
 525        GUEST_DONE();
 526}
 527
 528static bool test_memslot_move_prepare(struct vm_data *data,
 529                                      struct sync_area *sync,
 530                                      uint64_t *maxslots, bool isactive)
 531{
 532        uint64_t movesrcgpa, movetestgpa;
 533
 534        movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
 535
 536        if (isactive) {
 537                uint64_t lastpages;
 538
 539                vm_gpa2hva(data, movesrcgpa, &lastpages);
 540                if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
 541                        *maxslots = 0;
 542                        return false;
 543                }
 544        }
 545
 546        movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
 547        sync->move_area_ptr = (void *)movetestgpa;
 548
 549        if (isactive) {
 550                data->mmio_ok = true;
 551                data->mmio_gpa_min = movesrcgpa;
 552                data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
 553        }
 554
 555        return true;
 556}
 557
 558static bool test_memslot_move_prepare_active(struct vm_data *data,
 559                                             struct sync_area *sync,
 560                                             uint64_t *maxslots)
 561{
 562        return test_memslot_move_prepare(data, sync, maxslots, true);
 563}
 564
 565static bool test_memslot_move_prepare_inactive(struct vm_data *data,
 566                                               struct sync_area *sync,
 567                                               uint64_t *maxslots)
 568{
 569        return test_memslot_move_prepare(data, sync, maxslots, false);
 570}
 571
 572static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
 573{
 574        uint64_t movesrcgpa;
 575
 576        movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
 577        vm_mem_region_move(data->vm, data->nslots - 1 + 1,
 578                           MEM_TEST_MOVE_GPA_DEST);
 579        vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
 580}
 581
 582static void test_memslot_do_unmap(struct vm_data *data,
 583                                  uint64_t offsp, uint64_t count)
 584{
 585        uint64_t gpa, ctr;
 586
 587        for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
 588                uint64_t npages;
 589                void *hva;
 590                int ret;
 591
 592                hva = vm_gpa2hva(data, gpa, &npages);
 593                TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
 594                npages = min(npages, count - ctr);
 595                ret = madvise(hva, npages * 4096, MADV_DONTNEED);
 596                TEST_ASSERT(!ret,
 597                            "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
 598                            hva, gpa);
 599                ctr += npages;
 600                gpa += npages * 4096;
 601        }
 602        TEST_ASSERT(ctr == count,
 603                    "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
 604}
 605
 606static void test_memslot_map_unmap_check(struct vm_data *data,
 607                                         uint64_t offsp, uint64_t valexp)
 608{
 609        uint64_t gpa;
 610        uint64_t *val;
 611
 612        if (!map_unmap_verify)
 613                return;
 614
 615        gpa = MEM_TEST_GPA + offsp * 4096;
 616        val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
 617        TEST_ASSERT(*val == valexp,
 618                    "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
 619                    *val, valexp, gpa);
 620        *val = 0;
 621}
 622
 623static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
 624{
 625        /*
 626         * Unmap the second half of the test area while guest writes to (maps)
 627         * the first half.
 628         */
 629        test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
 630                              MEM_TEST_MAP_SIZE_PAGES / 2);
 631
 632        /*
 633         * Wait for the guest to finish writing the first half of the test
 634         * area, verify the written value on the first and the last page of
 635         * this area and then unmap it.
 636         * Meanwhile, the guest is writing to (mapping) the second half of
 637         * the test area.
 638         */
 639        host_perform_sync(sync);
 640        test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
 641        test_memslot_map_unmap_check(data,
 642                                     MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
 643                                     MEM_TEST_VAL_1);
 644        test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
 645
 646
 647        /*
 648         * Wait for the guest to finish writing the second half of the test
 649         * area and verify the written value on the first and the last page
 650         * of this area.
 651         * The area will be unmapped at the beginning of the next loop
 652         * iteration.
 653         * Meanwhile, the guest is writing to (mapping) the first half of
 654         * the test area.
 655         */
 656        host_perform_sync(sync);
 657        test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
 658                                     MEM_TEST_VAL_2);
 659        test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
 660                                     MEM_TEST_VAL_2);
 661}
 662
 663static void test_memslot_unmap_loop_common(struct vm_data *data,
 664                                           struct sync_area *sync,
 665                                           uint64_t chunk)
 666{
 667        uint64_t ctr;
 668
 669        /*
 670         * Wait for the guest to finish mapping page(s) in the first half
 671         * of the test area, verify the written value and then perform unmap
 672         * of this area.
 673         * Meanwhile, the guest is writing to (mapping) page(s) in the second
 674         * half of the test area.
 675         */
 676        host_perform_sync(sync);
 677        test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
 678        for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
 679                test_memslot_do_unmap(data, ctr, chunk);
 680
 681        /* Likewise, but for the opposite host / guest areas */
 682        host_perform_sync(sync);
 683        test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
 684                                     MEM_TEST_VAL_2);
 685        for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
 686             ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
 687                test_memslot_do_unmap(data, ctr, chunk);
 688}
 689
 690static void test_memslot_unmap_loop(struct vm_data *data,
 691                                    struct sync_area *sync)
 692{
 693        test_memslot_unmap_loop_common(data, sync, 1);
 694}
 695
 696static void test_memslot_unmap_loop_chunked(struct vm_data *data,
 697                                            struct sync_area *sync)
 698{
 699        test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
 700}
 701
 702static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
 703{
 704        uint64_t gptr;
 705
 706        for (gptr = MEM_TEST_GPA + 4096 / 2;
 707             gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
 708                *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
 709
 710        host_perform_sync(sync);
 711
 712        for (gptr = MEM_TEST_GPA;
 713             gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
 714                uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
 715                uint64_t val = *vptr;
 716
 717                TEST_ASSERT(val == MEM_TEST_VAL_1,
 718                            "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
 719                            val, gptr);
 720                *vptr = 0;
 721        }
 722
 723        host_perform_sync(sync);
 724}
 725
 726struct test_data {
 727        const char *name;
 728        uint64_t mem_size;
 729        void (*guest_code)(void);
 730        bool (*prepare)(struct vm_data *data, struct sync_area *sync,
 731                        uint64_t *maxslots);
 732        void (*loop)(struct vm_data *data, struct sync_area *sync);
 733};
 734
 735static bool test_execute(int nslots, uint64_t *maxslots,
 736                         unsigned int maxtime,
 737                         const struct test_data *tdata,
 738                         uint64_t *nloops,
 739                         struct timespec *slot_runtime,
 740                         struct timespec *guest_runtime)
 741{
 742        uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
 743        struct vm_data *data;
 744        struct sync_area *sync;
 745        struct timespec tstart;
 746        bool ret = true;
 747
 748        data = alloc_vm();
 749        if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
 750                        mem_size, slot_runtime)) {
 751                ret = false;
 752                goto exit_free;
 753        }
 754
 755        sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 756
 757        if (tdata->prepare &&
 758            !tdata->prepare(data, sync, maxslots)) {
 759                ret = false;
 760                goto exit_free;
 761        }
 762
 763        launch_vm(data);
 764
 765        clock_gettime(CLOCK_MONOTONIC, &tstart);
 766        let_guest_run(sync);
 767
 768        while (1) {
 769                *guest_runtime = timespec_elapsed(tstart);
 770                if (guest_runtime->tv_sec >= maxtime)
 771                        break;
 772
 773                tdata->loop(data, sync);
 774
 775                (*nloops)++;
 776        }
 777
 778        make_guest_exit(sync);
 779        wait_guest_exit(data);
 780
 781exit_free:
 782        free_vm(data);
 783
 784        return ret;
 785}
 786
 787static const struct test_data tests[] = {
 788        {
 789                .name = "map",
 790                .mem_size = MEM_SIZE_MAP_PAGES,
 791                .guest_code = guest_code_test_memslot_map,
 792                .loop = test_memslot_map_loop,
 793        },
 794        {
 795                .name = "unmap",
 796                .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
 797                .guest_code = guest_code_test_memslot_unmap,
 798                .loop = test_memslot_unmap_loop,
 799        },
 800        {
 801                .name = "unmap chunked",
 802                .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
 803                .guest_code = guest_code_test_memslot_unmap,
 804                .loop = test_memslot_unmap_loop_chunked,
 805        },
 806        {
 807                .name = "move active area",
 808                .guest_code = guest_code_test_memslot_move,
 809                .prepare = test_memslot_move_prepare_active,
 810                .loop = test_memslot_move_loop,
 811        },
 812        {
 813                .name = "move inactive area",
 814                .guest_code = guest_code_test_memslot_move,
 815                .prepare = test_memslot_move_prepare_inactive,
 816                .loop = test_memslot_move_loop,
 817        },
 818        {
 819                .name = "RW",
 820                .guest_code = guest_code_test_memslot_rw,
 821                .loop = test_memslot_rw_loop
 822        },
 823};
 824
 825#define NTESTS ARRAY_SIZE(tests)
 826
 827struct test_args {
 828        int tfirst;
 829        int tlast;
 830        int nslots;
 831        int seconds;
 832        int runs;
 833};
 834
 835static void help(char *name, struct test_args *targs)
 836{
 837        int ctr;
 838
 839        pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
 840                name);
 841        pr_info(" -h: print this help screen.\n");
 842        pr_info(" -v: enable verbose mode (not for benchmarking).\n");
 843        pr_info(" -d: enable extra debug checks.\n");
 844        pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
 845                targs->nslots);
 846        pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
 847                targs->tfirst, NTESTS - 1);
 848        pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
 849                targs->tlast, NTESTS - 1);
 850        pr_info(" -l: specify the test length in seconds (currently: %i)\n",
 851                targs->seconds);
 852        pr_info(" -r: specify the number of runs per test (currently: %i)\n",
 853                targs->runs);
 854
 855        pr_info("\nAvailable tests:\n");
 856        for (ctr = 0; ctr < NTESTS; ctr++)
 857                pr_info("%d: %s\n", ctr, tests[ctr].name);
 858}
 859
 860static bool parse_args(int argc, char *argv[],
 861                       struct test_args *targs)
 862{
 863        int opt;
 864
 865        while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
 866                switch (opt) {
 867                case 'h':
 868                default:
 869                        help(argv[0], targs);
 870                        return false;
 871                case 'v':
 872                        verbose = true;
 873                        break;
 874                case 'd':
 875                        map_unmap_verify = true;
 876                        break;
 877                case 's':
 878                        targs->nslots = atoi(optarg);
 879                        if (targs->nslots <= 0 && targs->nslots != -1) {
 880                                pr_info("Slot count cap has to be positive or -1 for no cap\n");
 881                                return false;
 882                        }
 883                        break;
 884                case 'f':
 885                        targs->tfirst = atoi(optarg);
 886                        if (targs->tfirst < 0) {
 887                                pr_info("First test to run has to be non-negative\n");
 888                                return false;
 889                        }
 890                        break;
 891                case 'e':
 892                        targs->tlast = atoi(optarg);
 893                        if (targs->tlast < 0 || targs->tlast >= NTESTS) {
 894                                pr_info("Last test to run has to be non-negative and less than %zu\n",
 895                                        NTESTS);
 896                                return false;
 897                        }
 898                        break;
 899                case 'l':
 900                        targs->seconds = atoi(optarg);
 901                        if (targs->seconds < 0) {
 902                                pr_info("Test length in seconds has to be non-negative\n");
 903                                return false;
 904                        }
 905                        break;
 906                case 'r':
 907                        targs->runs = atoi(optarg);
 908                        if (targs->runs <= 0) {
 909                                pr_info("Runs per test has to be positive\n");
 910                                return false;
 911                        }
 912                        break;
 913                }
 914        }
 915
 916        if (optind < argc) {
 917                help(argv[0], targs);
 918                return false;
 919        }
 920
 921        if (targs->tfirst > targs->tlast) {
 922                pr_info("First test to run cannot be greater than the last test to run\n");
 923                return false;
 924        }
 925
 926        return true;
 927}
 928
 929struct test_result {
 930        struct timespec slot_runtime, guest_runtime, iter_runtime;
 931        int64_t slottimens, runtimens;
 932        uint64_t nloops;
 933};
 934
 935static bool test_loop(const struct test_data *data,
 936                      const struct test_args *targs,
 937                      struct test_result *rbestslottime,
 938                      struct test_result *rbestruntime)
 939{
 940        uint64_t maxslots;
 941        struct test_result result;
 942
 943        result.nloops = 0;
 944        if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
 945                          &result.nloops,
 946                          &result.slot_runtime, &result.guest_runtime)) {
 947                if (maxslots)
 948                        pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
 949                                maxslots);
 950                else
 951                        pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
 952
 953                return false;
 954        }
 955
 956        pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
 957                result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
 958                result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
 959        if (!result.nloops) {
 960                pr_info("No full loops done - too short test time or system too loaded?\n");
 961                return true;
 962        }
 963
 964        result.iter_runtime = timespec_div(result.guest_runtime,
 965                                           result.nloops);
 966        pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
 967                result.nloops,
 968                result.iter_runtime.tv_sec,
 969                result.iter_runtime.tv_nsec);
 970        result.slottimens = timespec_to_ns(result.slot_runtime);
 971        result.runtimens = timespec_to_ns(result.iter_runtime);
 972
 973        /*
 974         * Only rank the slot setup time for tests using the whole test memory
 975         * area so they are comparable
 976         */
 977        if (!data->mem_size &&
 978            (!rbestslottime->slottimens ||
 979             result.slottimens < rbestslottime->slottimens))
 980                *rbestslottime = result;
 981        if (!rbestruntime->runtimens ||
 982            result.runtimens < rbestruntime->runtimens)
 983                *rbestruntime = result;
 984
 985        return true;
 986}
 987
 988int main(int argc, char *argv[])
 989{
 990        struct test_args targs = {
 991                .tfirst = 0,
 992                .tlast = NTESTS - 1,
 993                .nslots = -1,
 994                .seconds = 5,
 995                .runs = 1,
 996        };
 997        struct test_result rbestslottime;
 998        int tctr;
 999
1000        /* Tell stdout not to buffer its content */
1001        setbuf(stdout, NULL);
1002
1003        if (!parse_args(argc, argv, &targs))
1004                return -1;
1005
1006        rbestslottime.slottimens = 0;
1007        for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
1008                const struct test_data *data = &tests[tctr];
1009                unsigned int runctr;
1010                struct test_result rbestruntime;
1011
1012                if (tctr > targs.tfirst)
1013                        pr_info("\n");
1014
1015                pr_info("Testing %s performance with %i runs, %d seconds each\n",
1016                        data->name, targs.runs, targs.seconds);
1017
1018                rbestruntime.runtimens = 0;
1019                for (runctr = 0; runctr < targs.runs; runctr++)
1020                        if (!test_loop(data, &targs,
1021                                       &rbestslottime, &rbestruntime))
1022                                break;
1023
1024                if (rbestruntime.runtimens)
1025                        pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
1026                                rbestruntime.iter_runtime.tv_sec,
1027                                rbestruntime.iter_runtime.tv_nsec,
1028                                rbestruntime.nloops);
1029        }
1030
1031        if (rbestslottime.slottimens)
1032                pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
1033                        rbestslottime.slot_runtime.tv_sec,
1034                        rbestslottime.slot_runtime.tv_nsec);
1035
1036        return 0;
1037}
1038