linux/tools/testing/selftests/kvm/lib/x86_64/processor.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * tools/testing/selftests/kvm/lib/x86_64/processor.c
   4 *
   5 * Copyright (C) 2018, Google LLC.
   6 */
   7
   8#define _GNU_SOURCE /* for program_invocation_name */
   9
  10#include "test_util.h"
  11#include "kvm_util.h"
  12#include "../kvm_util_internal.h"
  13#include "processor.h"
  14
  15/* Minimum physical address used for virtual translation tables. */
  16#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
  17
  18/* Virtual translation table structure declarations */
  19struct pageMapL4Entry {
  20        uint64_t present:1;
  21        uint64_t writable:1;
  22        uint64_t user:1;
  23        uint64_t write_through:1;
  24        uint64_t cache_disable:1;
  25        uint64_t accessed:1;
  26        uint64_t ignored_06:1;
  27        uint64_t page_size:1;
  28        uint64_t ignored_11_08:4;
  29        uint64_t address:40;
  30        uint64_t ignored_62_52:11;
  31        uint64_t execute_disable:1;
  32};
  33
  34struct pageDirectoryPointerEntry {
  35        uint64_t present:1;
  36        uint64_t writable:1;
  37        uint64_t user:1;
  38        uint64_t write_through:1;
  39        uint64_t cache_disable:1;
  40        uint64_t accessed:1;
  41        uint64_t ignored_06:1;
  42        uint64_t page_size:1;
  43        uint64_t ignored_11_08:4;
  44        uint64_t address:40;
  45        uint64_t ignored_62_52:11;
  46        uint64_t execute_disable:1;
  47};
  48
  49struct pageDirectoryEntry {
  50        uint64_t present:1;
  51        uint64_t writable:1;
  52        uint64_t user:1;
  53        uint64_t write_through:1;
  54        uint64_t cache_disable:1;
  55        uint64_t accessed:1;
  56        uint64_t ignored_06:1;
  57        uint64_t page_size:1;
  58        uint64_t ignored_11_08:4;
  59        uint64_t address:40;
  60        uint64_t ignored_62_52:11;
  61        uint64_t execute_disable:1;
  62};
  63
  64struct pageTableEntry {
  65        uint64_t present:1;
  66        uint64_t writable:1;
  67        uint64_t user:1;
  68        uint64_t write_through:1;
  69        uint64_t cache_disable:1;
  70        uint64_t accessed:1;
  71        uint64_t dirty:1;
  72        uint64_t reserved_07:1;
  73        uint64_t global:1;
  74        uint64_t ignored_11_09:3;
  75        uint64_t address:40;
  76        uint64_t ignored_62_52:11;
  77        uint64_t execute_disable:1;
  78};
  79
  80void regs_dump(FILE *stream, struct kvm_regs *regs,
  81               uint8_t indent)
  82{
  83        fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
  84                "rcx: 0x%.16llx rdx: 0x%.16llx\n",
  85                indent, "",
  86                regs->rax, regs->rbx, regs->rcx, regs->rdx);
  87        fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
  88                "rsp: 0x%.16llx rbp: 0x%.16llx\n",
  89                indent, "",
  90                regs->rsi, regs->rdi, regs->rsp, regs->rbp);
  91        fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
  92                "r10: 0x%.16llx r11: 0x%.16llx\n",
  93                indent, "",
  94                regs->r8, regs->r9, regs->r10, regs->r11);
  95        fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
  96                "r14: 0x%.16llx r15: 0x%.16llx\n",
  97                indent, "",
  98                regs->r12, regs->r13, regs->r14, regs->r15);
  99        fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
 100                indent, "",
 101                regs->rip, regs->rflags);
 102}
 103
 104/*
 105 * Segment Dump
 106 *
 107 * Input Args:
 108 *   stream  - Output FILE stream
 109 *   segment - KVM segment
 110 *   indent  - Left margin indent amount
 111 *
 112 * Output Args: None
 113 *
 114 * Return: None
 115 *
 116 * Dumps the state of the KVM segment given by @segment, to the FILE stream
 117 * given by @stream.
 118 */
 119static void segment_dump(FILE *stream, struct kvm_segment *segment,
 120                         uint8_t indent)
 121{
 122        fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
 123                "selector: 0x%.4x type: 0x%.2x\n",
 124                indent, "", segment->base, segment->limit,
 125                segment->selector, segment->type);
 126        fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
 127                "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
 128                indent, "", segment->present, segment->dpl,
 129                segment->db, segment->s, segment->l);
 130        fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
 131                "unusable: 0x%.2x padding: 0x%.2x\n",
 132                indent, "", segment->g, segment->avl,
 133                segment->unusable, segment->padding);
 134}
 135
 136/*
 137 * dtable Dump
 138 *
 139 * Input Args:
 140 *   stream - Output FILE stream
 141 *   dtable - KVM dtable
 142 *   indent - Left margin indent amount
 143 *
 144 * Output Args: None
 145 *
 146 * Return: None
 147 *
 148 * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
 149 * given by @stream.
 150 */
 151static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
 152                        uint8_t indent)
 153{
 154        fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
 155                "padding: 0x%.4x 0x%.4x 0x%.4x\n",
 156                indent, "", dtable->base, dtable->limit,
 157                dtable->padding[0], dtable->padding[1], dtable->padding[2]);
 158}
 159
 160void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
 161                uint8_t indent)
 162{
 163        unsigned int i;
 164
 165        fprintf(stream, "%*scs:\n", indent, "");
 166        segment_dump(stream, &sregs->cs, indent + 2);
 167        fprintf(stream, "%*sds:\n", indent, "");
 168        segment_dump(stream, &sregs->ds, indent + 2);
 169        fprintf(stream, "%*ses:\n", indent, "");
 170        segment_dump(stream, &sregs->es, indent + 2);
 171        fprintf(stream, "%*sfs:\n", indent, "");
 172        segment_dump(stream, &sregs->fs, indent + 2);
 173        fprintf(stream, "%*sgs:\n", indent, "");
 174        segment_dump(stream, &sregs->gs, indent + 2);
 175        fprintf(stream, "%*sss:\n", indent, "");
 176        segment_dump(stream, &sregs->ss, indent + 2);
 177        fprintf(stream, "%*str:\n", indent, "");
 178        segment_dump(stream, &sregs->tr, indent + 2);
 179        fprintf(stream, "%*sldt:\n", indent, "");
 180        segment_dump(stream, &sregs->ldt, indent + 2);
 181
 182        fprintf(stream, "%*sgdt:\n", indent, "");
 183        dtable_dump(stream, &sregs->gdt, indent + 2);
 184        fprintf(stream, "%*sidt:\n", indent, "");
 185        dtable_dump(stream, &sregs->idt, indent + 2);
 186
 187        fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
 188                "cr3: 0x%.16llx cr4: 0x%.16llx\n",
 189                indent, "",
 190                sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
 191        fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
 192                "apic_base: 0x%.16llx\n",
 193                indent, "",
 194                sregs->cr8, sregs->efer, sregs->apic_base);
 195
 196        fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
 197        for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
 198                fprintf(stream, "%*s%.16llx\n", indent + 2, "",
 199                        sregs->interrupt_bitmap[i]);
 200        }
 201}
 202
 203void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 204{
 205        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 206                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 207
 208        /* If needed, create page map l4 table. */
 209        if (!vm->pgd_created) {
 210                vm_paddr_t paddr = vm_phy_page_alloc(vm,
 211                        KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
 212                vm->pgd = paddr;
 213                vm->pgd_created = true;
 214        }
 215}
 216
 217void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 218        uint32_t pgd_memslot)
 219{
 220        uint16_t index[4];
 221        struct pageMapL4Entry *pml4e;
 222
 223        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 224                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 225
 226        TEST_ASSERT((vaddr % vm->page_size) == 0,
 227                "Virtual address not on page boundary,\n"
 228                "  vaddr: 0x%lx vm->page_size: 0x%x",
 229                vaddr, vm->page_size);
 230        TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
 231                (vaddr >> vm->page_shift)),
 232                "Invalid virtual address, vaddr: 0x%lx",
 233                vaddr);
 234        TEST_ASSERT((paddr % vm->page_size) == 0,
 235                "Physical address not on page boundary,\n"
 236                "  paddr: 0x%lx vm->page_size: 0x%x",
 237                paddr, vm->page_size);
 238        TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
 239                "Physical address beyond beyond maximum supported,\n"
 240                "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 241                paddr, vm->max_gfn, vm->page_size);
 242
 243        index[0] = (vaddr >> 12) & 0x1ffu;
 244        index[1] = (vaddr >> 21) & 0x1ffu;
 245        index[2] = (vaddr >> 30) & 0x1ffu;
 246        index[3] = (vaddr >> 39) & 0x1ffu;
 247
 248        /* Allocate page directory pointer table if not present. */
 249        pml4e = addr_gpa2hva(vm, vm->pgd);
 250        if (!pml4e[index[3]].present) {
 251                pml4e[index[3]].address = vm_phy_page_alloc(vm,
 252                        KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
 253                        >> vm->page_shift;
 254                pml4e[index[3]].writable = true;
 255                pml4e[index[3]].present = true;
 256        }
 257
 258        /* Allocate page directory table if not present. */
 259        struct pageDirectoryPointerEntry *pdpe;
 260        pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
 261        if (!pdpe[index[2]].present) {
 262                pdpe[index[2]].address = vm_phy_page_alloc(vm,
 263                        KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
 264                        >> vm->page_shift;
 265                pdpe[index[2]].writable = true;
 266                pdpe[index[2]].present = true;
 267        }
 268
 269        /* Allocate page table if not present. */
 270        struct pageDirectoryEntry *pde;
 271        pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
 272        if (!pde[index[1]].present) {
 273                pde[index[1]].address = vm_phy_page_alloc(vm,
 274                        KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
 275                        >> vm->page_shift;
 276                pde[index[1]].writable = true;
 277                pde[index[1]].present = true;
 278        }
 279
 280        /* Fill in page table entry. */
 281        struct pageTableEntry *pte;
 282        pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
 283        pte[index[0]].address = paddr >> vm->page_shift;
 284        pte[index[0]].writable = true;
 285        pte[index[0]].present = 1;
 286}
 287
 288void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 289{
 290        struct pageMapL4Entry *pml4e, *pml4e_start;
 291        struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
 292        struct pageDirectoryEntry *pde, *pde_start;
 293        struct pageTableEntry *pte, *pte_start;
 294
 295        if (!vm->pgd_created)
 296                return;
 297
 298        fprintf(stream, "%*s                                          "
 299                "                no\n", indent, "");
 300        fprintf(stream, "%*s      index hvaddr         gpaddr         "
 301                "addr         w exec dirty\n",
 302                indent, "");
 303        pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
 304                vm->pgd);
 305        for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
 306                pml4e = &pml4e_start[n1];
 307                if (!pml4e->present)
 308                        continue;
 309                fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
 310                        " %u\n",
 311                        indent, "",
 312                        pml4e - pml4e_start, pml4e,
 313                        addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
 314                        pml4e->writable, pml4e->execute_disable);
 315
 316                pdpe_start = addr_gpa2hva(vm, pml4e->address
 317                        * vm->page_size);
 318                for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
 319                        pdpe = &pdpe_start[n2];
 320                        if (!pdpe->present)
 321                                continue;
 322                        fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
 323                                "%u  %u\n",
 324                                indent, "",
 325                                pdpe - pdpe_start, pdpe,
 326                                addr_hva2gpa(vm, pdpe),
 327                                (uint64_t) pdpe->address, pdpe->writable,
 328                                pdpe->execute_disable);
 329
 330                        pde_start = addr_gpa2hva(vm,
 331                                pdpe->address * vm->page_size);
 332                        for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
 333                                pde = &pde_start[n3];
 334                                if (!pde->present)
 335                                        continue;
 336                                fprintf(stream, "%*spde   0x%-3zx %p "
 337                                        "0x%-12lx 0x%-10lx %u  %u\n",
 338                                        indent, "", pde - pde_start, pde,
 339                                        addr_hva2gpa(vm, pde),
 340                                        (uint64_t) pde->address, pde->writable,
 341                                        pde->execute_disable);
 342
 343                                pte_start = addr_gpa2hva(vm,
 344                                        pde->address * vm->page_size);
 345                                for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
 346                                        pte = &pte_start[n4];
 347                                        if (!pte->present)
 348                                                continue;
 349                                        fprintf(stream, "%*spte   0x%-3zx %p "
 350                                                "0x%-12lx 0x%-10lx %u  %u "
 351                                                "    %u    0x%-10lx\n",
 352                                                indent, "",
 353                                                pte - pte_start, pte,
 354                                                addr_hva2gpa(vm, pte),
 355                                                (uint64_t) pte->address,
 356                                                pte->writable,
 357                                                pte->execute_disable,
 358                                                pte->dirty,
 359                                                ((uint64_t) n1 << 27)
 360                                                        | ((uint64_t) n2 << 18)
 361                                                        | ((uint64_t) n3 << 9)
 362                                                        | ((uint64_t) n4));
 363                                }
 364                        }
 365                }
 366        }
 367}
 368
 369/*
 370 * Set Unusable Segment
 371 *
 372 * Input Args: None
 373 *
 374 * Output Args:
 375 *   segp - Pointer to segment register
 376 *
 377 * Return: None
 378 *
 379 * Sets the segment register pointed to by @segp to an unusable state.
 380 */
 381static void kvm_seg_set_unusable(struct kvm_segment *segp)
 382{
 383        memset(segp, 0, sizeof(*segp));
 384        segp->unusable = true;
 385}
 386
 387static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
 388{
 389        void *gdt = addr_gva2hva(vm, vm->gdt);
 390        struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
 391
 392        desc->limit0 = segp->limit & 0xFFFF;
 393        desc->base0 = segp->base & 0xFFFF;
 394        desc->base1 = segp->base >> 16;
 395        desc->s = segp->s;
 396        desc->type = segp->type;
 397        desc->dpl = segp->dpl;
 398        desc->p = segp->present;
 399        desc->limit1 = segp->limit >> 16;
 400        desc->l = segp->l;
 401        desc->db = segp->db;
 402        desc->g = segp->g;
 403        desc->base2 = segp->base >> 24;
 404        if (!segp->s)
 405                desc->base3 = segp->base >> 32;
 406}
 407
 408
 409/*
 410 * Set Long Mode Flat Kernel Code Segment
 411 *
 412 * Input Args:
 413 *   vm - VM whose GDT is being filled, or NULL to only write segp
 414 *   selector - selector value
 415 *
 416 * Output Args:
 417 *   segp - Pointer to KVM segment
 418 *
 419 * Return: None
 420 *
 421 * Sets up the KVM segment pointed to by @segp, to be a code segment
 422 * with the selector value given by @selector.
 423 */
 424static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
 425        struct kvm_segment *segp)
 426{
 427        memset(segp, 0, sizeof(*segp));
 428        segp->selector = selector;
 429        segp->limit = 0xFFFFFFFFu;
 430        segp->s = 0x1; /* kTypeCodeData */
 431        segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
 432                                          * | kFlagCodeReadable
 433                                          */
 434        segp->g = true;
 435        segp->l = true;
 436        segp->present = 1;
 437        if (vm)
 438                kvm_seg_fill_gdt_64bit(vm, segp);
 439}
 440
 441/*
 442 * Set Long Mode Flat Kernel Data Segment
 443 *
 444 * Input Args:
 445 *   vm - VM whose GDT is being filled, or NULL to only write segp
 446 *   selector - selector value
 447 *
 448 * Output Args:
 449 *   segp - Pointer to KVM segment
 450 *
 451 * Return: None
 452 *
 453 * Sets up the KVM segment pointed to by @segp, to be a data segment
 454 * with the selector value given by @selector.
 455 */
 456static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 457        struct kvm_segment *segp)
 458{
 459        memset(segp, 0, sizeof(*segp));
 460        segp->selector = selector;
 461        segp->limit = 0xFFFFFFFFu;
 462        segp->s = 0x1; /* kTypeCodeData */
 463        segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
 464                                          * | kFlagDataWritable
 465                                          */
 466        segp->g = true;
 467        segp->present = true;
 468        if (vm)
 469                kvm_seg_fill_gdt_64bit(vm, segp);
 470}
 471
 472vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 473{
 474        uint16_t index[4];
 475        struct pageMapL4Entry *pml4e;
 476        struct pageDirectoryPointerEntry *pdpe;
 477        struct pageDirectoryEntry *pde;
 478        struct pageTableEntry *pte;
 479
 480        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 481                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 482
 483        index[0] = (gva >> 12) & 0x1ffu;
 484        index[1] = (gva >> 21) & 0x1ffu;
 485        index[2] = (gva >> 30) & 0x1ffu;
 486        index[3] = (gva >> 39) & 0x1ffu;
 487
 488        if (!vm->pgd_created)
 489                goto unmapped_gva;
 490        pml4e = addr_gpa2hva(vm, vm->pgd);
 491        if (!pml4e[index[3]].present)
 492                goto unmapped_gva;
 493
 494        pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
 495        if (!pdpe[index[2]].present)
 496                goto unmapped_gva;
 497
 498        pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
 499        if (!pde[index[1]].present)
 500                goto unmapped_gva;
 501
 502        pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
 503        if (!pte[index[0]].present)
 504                goto unmapped_gva;
 505
 506        return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
 507
 508unmapped_gva:
 509        TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
 510        exit(EXIT_FAILURE);
 511}
 512
 513static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
 514                          int pgd_memslot)
 515{
 516        if (!vm->gdt)
 517                vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
 518                        KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
 519
 520        dt->base = vm->gdt;
 521        dt->limit = getpagesize();
 522}
 523
 524static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
 525                                int selector, int gdt_memslot,
 526                                int pgd_memslot)
 527{
 528        if (!vm->tss)
 529                vm->tss = vm_vaddr_alloc(vm, getpagesize(),
 530                        KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
 531
 532        memset(segp, 0, sizeof(*segp));
 533        segp->base = vm->tss;
 534        segp->limit = 0x67;
 535        segp->selector = selector;
 536        segp->type = 0xb;
 537        segp->present = 1;
 538        kvm_seg_fill_gdt_64bit(vm, segp);
 539}
 540
 541static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
 542{
 543        struct kvm_sregs sregs;
 544
 545        /* Set mode specific system register values. */
 546        vcpu_sregs_get(vm, vcpuid, &sregs);
 547
 548        sregs.idt.limit = 0;
 549
 550        kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
 551
 552        switch (vm->mode) {
 553        case VM_MODE_PXXV48_4K:
 554                sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
 555                sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
 556                sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
 557
 558                kvm_seg_set_unusable(&sregs.ldt);
 559                kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
 560                kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
 561                kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
 562                kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
 563                break;
 564
 565        default:
 566                TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
 567        }
 568
 569        sregs.cr3 = vm->pgd;
 570        vcpu_sregs_set(vm, vcpuid, &sregs);
 571}
 572
 573void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 574{
 575        struct kvm_mp_state mp_state;
 576        struct kvm_regs regs;
 577        vm_vaddr_t stack_vaddr;
 578        stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
 579                                     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
 580
 581        /* Create VCPU */
 582        vm_vcpu_add(vm, vcpuid);
 583        vcpu_setup(vm, vcpuid, 0, 0);
 584
 585        /* Setup guest general purpose registers */
 586        vcpu_regs_get(vm, vcpuid, &regs);
 587        regs.rflags = regs.rflags | 0x2;
 588        regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
 589        regs.rip = (unsigned long) guest_code;
 590        vcpu_regs_set(vm, vcpuid, &regs);
 591
 592        /* Setup the MP state */
 593        mp_state.mp_state = 0;
 594        vcpu_set_mp_state(vm, vcpuid, &mp_state);
 595}
 596
 597/*
 598 * Allocate an instance of struct kvm_cpuid2
 599 *
 600 * Input Args: None
 601 *
 602 * Output Args: None
 603 *
 604 * Return: A pointer to the allocated struct. The caller is responsible
 605 * for freeing this struct.
 606 *
 607 * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
 608 * array to be decided at allocation time, allocation is slightly
 609 * complicated. This function uses a reasonable default length for
 610 * the array and performs the appropriate allocation.
 611 */
 612static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
 613{
 614        struct kvm_cpuid2 *cpuid;
 615        int nent = 100;
 616        size_t size;
 617
 618        size = sizeof(*cpuid);
 619        size += nent * sizeof(struct kvm_cpuid_entry2);
 620        cpuid = malloc(size);
 621        if (!cpuid) {
 622                perror("malloc");
 623                abort();
 624        }
 625
 626        cpuid->nent = nent;
 627
 628        return cpuid;
 629}
 630
 631/*
 632 * KVM Supported CPUID Get
 633 *
 634 * Input Args: None
 635 *
 636 * Output Args:
 637 *
 638 * Return: The supported KVM CPUID
 639 *
 640 * Get the guest CPUID supported by KVM.
 641 */
 642struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
 643{
 644        static struct kvm_cpuid2 *cpuid;
 645        int ret;
 646        int kvm_fd;
 647
 648        if (cpuid)
 649                return cpuid;
 650
 651        cpuid = allocate_kvm_cpuid2();
 652        kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
 653        if (kvm_fd < 0)
 654                exit(KSFT_SKIP);
 655
 656        ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
 657        TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
 658                    ret, errno);
 659
 660        close(kvm_fd);
 661        return cpuid;
 662}
 663
 664/*
 665 * Locate a cpuid entry.
 666 *
 667 * Input Args:
 668 *   function: The function of the cpuid entry to find.
 669 *   index: The index of the cpuid entry.
 670 *
 671 * Output Args: None
 672 *
 673 * Return: A pointer to the cpuid entry. Never returns NULL.
 674 */
 675struct kvm_cpuid_entry2 *
 676kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
 677{
 678        struct kvm_cpuid2 *cpuid;
 679        struct kvm_cpuid_entry2 *entry = NULL;
 680        int i;
 681
 682        cpuid = kvm_get_supported_cpuid();
 683        for (i = 0; i < cpuid->nent; i++) {
 684                if (cpuid->entries[i].function == function &&
 685                    cpuid->entries[i].index == index) {
 686                        entry = &cpuid->entries[i];
 687                        break;
 688                }
 689        }
 690
 691        TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
 692                    function, index);
 693        return entry;
 694}
 695
 696/*
 697 * VM VCPU CPUID Set
 698 *
 699 * Input Args:
 700 *   vm - Virtual Machine
 701 *   vcpuid - VCPU id
 702 *   cpuid - The CPUID values to set.
 703 *
 704 * Output Args: None
 705 *
 706 * Return: void
 707 *
 708 * Set the VCPU's CPUID.
 709 */
 710void vcpu_set_cpuid(struct kvm_vm *vm,
 711                uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
 712{
 713        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 714        int rc;
 715
 716        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 717
 718        rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
 719        TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
 720                    rc, errno);
 721
 722}
 723
 724struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 725                                 void *guest_code)
 726{
 727        struct kvm_vm *vm;
 728        /*
 729         * For x86 the maximum page table size for a memory region
 730         * will be when only 4K pages are used.  In that case the
 731         * total extra size for page tables (for extra N pages) will
 732         * be: N/512+N/512^2+N/512^3+... which is definitely smaller
 733         * than N/512*2.
 734         */
 735        uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 736
 737        /* Create VM */
 738        vm = vm_create(VM_MODE_DEFAULT,
 739                       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
 740                       O_RDWR);
 741
 742        /* Setup guest code */
 743        kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 744
 745        /* Setup IRQ Chip */
 746        vm_create_irqchip(vm);
 747
 748        /* Add the first vCPU. */
 749        vm_vcpu_add_default(vm, vcpuid, guest_code);
 750
 751        return vm;
 752}
 753
 754/*
 755 * VCPU Get MSR
 756 *
 757 * Input Args:
 758 *   vm - Virtual Machine
 759 *   vcpuid - VCPU ID
 760 *   msr_index - Index of MSR
 761 *
 762 * Output Args: None
 763 *
 764 * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
 765 *
 766 * Get value of MSR for VCPU.
 767 */
 768uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
 769{
 770        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 771        struct {
 772                struct kvm_msrs header;
 773                struct kvm_msr_entry entry;
 774        } buffer = {};
 775        int r;
 776
 777        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 778        buffer.header.nmsrs = 1;
 779        buffer.entry.index = msr_index;
 780        r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
 781        TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
 782                "  rc: %i errno: %i", r, errno);
 783
 784        return buffer.entry.data;
 785}
 786
 787/*
 788 * _VCPU Set MSR
 789 *
 790 * Input Args:
 791 *   vm - Virtual Machine
 792 *   vcpuid - VCPU ID
 793 *   msr_index - Index of MSR
 794 *   msr_value - New value of MSR
 795 *
 796 * Output Args: None
 797 *
 798 * Return: The result of KVM_SET_MSRS.
 799 *
 800 * Sets the value of an MSR for the given VCPU.
 801 */
 802int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
 803                  uint64_t msr_value)
 804{
 805        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 806        struct {
 807                struct kvm_msrs header;
 808                struct kvm_msr_entry entry;
 809        } buffer = {};
 810        int r;
 811
 812        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 813        memset(&buffer, 0, sizeof(buffer));
 814        buffer.header.nmsrs = 1;
 815        buffer.entry.index = msr_index;
 816        buffer.entry.data = msr_value;
 817        r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
 818        return r;
 819}
 820
 821/*
 822 * VCPU Set MSR
 823 *
 824 * Input Args:
 825 *   vm - Virtual Machine
 826 *   vcpuid - VCPU ID
 827 *   msr_index - Index of MSR
 828 *   msr_value - New value of MSR
 829 *
 830 * Output Args: None
 831 *
 832 * Return: On success, nothing. On failure a TEST_ASSERT is produced.
 833 *
 834 * Set value of MSR for VCPU.
 835 */
 836void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
 837        uint64_t msr_value)
 838{
 839        int r;
 840
 841        r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
 842        TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
 843                "  rc: %i errno: %i", r, errno);
 844}
 845
 846void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 847{
 848        va_list ap;
 849        struct kvm_regs regs;
 850
 851        TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
 852                    "  num: %u\n",
 853                    num);
 854
 855        va_start(ap, num);
 856        vcpu_regs_get(vm, vcpuid, &regs);
 857
 858        if (num >= 1)
 859                regs.rdi = va_arg(ap, uint64_t);
 860
 861        if (num >= 2)
 862                regs.rsi = va_arg(ap, uint64_t);
 863
 864        if (num >= 3)
 865                regs.rdx = va_arg(ap, uint64_t);
 866
 867        if (num >= 4)
 868                regs.rcx = va_arg(ap, uint64_t);
 869
 870        if (num >= 5)
 871                regs.r8 = va_arg(ap, uint64_t);
 872
 873        if (num >= 6)
 874                regs.r9 = va_arg(ap, uint64_t);
 875
 876        vcpu_regs_set(vm, vcpuid, &regs);
 877        va_end(ap);
 878}
 879
 880void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 881{
 882        struct kvm_regs regs;
 883        struct kvm_sregs sregs;
 884
 885        fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
 886
 887        fprintf(stream, "%*sregs:\n", indent + 2, "");
 888        vcpu_regs_get(vm, vcpuid, &regs);
 889        regs_dump(stream, &regs, indent + 4);
 890
 891        fprintf(stream, "%*ssregs:\n", indent + 2, "");
 892        vcpu_sregs_get(vm, vcpuid, &sregs);
 893        sregs_dump(stream, &sregs, indent + 4);
 894}
 895
 896struct kvm_x86_state {
 897        struct kvm_vcpu_events events;
 898        struct kvm_mp_state mp_state;
 899        struct kvm_regs regs;
 900        struct kvm_xsave xsave;
 901        struct kvm_xcrs xcrs;
 902        struct kvm_sregs sregs;
 903        struct kvm_debugregs debugregs;
 904        union {
 905                struct kvm_nested_state nested;
 906                char nested_[16384];
 907        };
 908        struct kvm_msrs msrs;
 909};
 910
 911static int kvm_get_num_msrs_fd(int kvm_fd)
 912{
 913        struct kvm_msr_list nmsrs;
 914        int r;
 915
 916        nmsrs.nmsrs = 0;
 917        r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
 918        TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
 919                r);
 920
 921        return nmsrs.nmsrs;
 922}
 923
 924static int kvm_get_num_msrs(struct kvm_vm *vm)
 925{
 926        return kvm_get_num_msrs_fd(vm->kvm_fd);
 927}
 928
 929struct kvm_msr_list *kvm_get_msr_index_list(void)
 930{
 931        struct kvm_msr_list *list;
 932        int nmsrs, r, kvm_fd;
 933
 934        kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
 935        if (kvm_fd < 0)
 936                exit(KSFT_SKIP);
 937
 938        nmsrs = kvm_get_num_msrs_fd(kvm_fd);
 939        list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
 940        list->nmsrs = nmsrs;
 941        r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
 942        close(kvm_fd);
 943
 944        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
 945                r);
 946
 947        return list;
 948}
 949
 950struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
 951{
 952        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 953        struct kvm_msr_list *list;
 954        struct kvm_x86_state *state;
 955        int nmsrs, r, i;
 956        static int nested_size = -1;
 957
 958        if (nested_size == -1) {
 959                nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
 960                TEST_ASSERT(nested_size <= sizeof(state->nested_),
 961                            "Nested state size too big, %i > %zi",
 962                            nested_size, sizeof(state->nested_));
 963        }
 964
 965        /*
 966         * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
 967         * guest state is consistent only after userspace re-enters the
 968         * kernel with KVM_RUN.  Complete IO prior to migrating state
 969         * to a new VM.
 970         */
 971        vcpu_run_complete_io(vm, vcpuid);
 972
 973        nmsrs = kvm_get_num_msrs(vm);
 974        list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
 975        list->nmsrs = nmsrs;
 976        r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
 977        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
 978                r);
 979
 980        state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
 981        r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
 982        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
 983                r);
 984
 985        r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
 986        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
 987                r);
 988
 989        r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
 990        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
 991                r);
 992
 993        r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
 994        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
 995                r);
 996
 997        if (kvm_check_cap(KVM_CAP_XCRS)) {
 998                r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
 999                TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
1000                            r);
1001        }
1002
1003        r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
1004        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
1005                r);
1006
1007        if (nested_size) {
1008                state->nested.size = sizeof(state->nested_);
1009                r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
1010                TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
1011                        r);
1012                TEST_ASSERT(state->nested.size <= nested_size,
1013                        "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
1014                        state->nested.size, nested_size);
1015        } else
1016                state->nested.size = 0;
1017
1018        state->msrs.nmsrs = nmsrs;
1019        for (i = 0; i < nmsrs; i++)
1020                state->msrs.entries[i].index = list->indices[i];
1021        r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
1022        TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
1023                r, r == nmsrs ? -1 : list->indices[r]);
1024
1025        r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
1026        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
1027                r);
1028
1029        free(list);
1030        return state;
1031}
1032
1033void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
1034{
1035        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1036        int r;
1037
1038        r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
1039        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
1040                r);
1041
1042        if (kvm_check_cap(KVM_CAP_XCRS)) {
1043                r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
1044                TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
1045                            r);
1046        }
1047
1048        r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
1049        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
1050                r);
1051
1052        r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
1053        TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
1054                r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
1055
1056        r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
1057        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
1058                r);
1059
1060        r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
1061        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
1062                r);
1063
1064        r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
1065        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
1066                r);
1067
1068        r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
1069        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
1070                r);
1071
1072        if (state->nested.size) {
1073                r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
1074                TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
1075                        r);
1076        }
1077}
1078
1079bool is_intel_cpu(void)
1080{
1081        int eax, ebx, ecx, edx;
1082        const uint32_t *chunk;
1083        const int leaf = 0;
1084
1085        __asm__ __volatile__(
1086                "cpuid"
1087                : /* output */ "=a"(eax), "=b"(ebx),
1088                  "=c"(ecx), "=d"(edx)
1089                : /* input */ "0"(leaf), "2"(0));
1090
1091        chunk = (const uint32_t *)("GenuineIntel");
1092        return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
1093}
1094
1095uint32_t kvm_get_cpuid_max_basic(void)
1096{
1097        return kvm_get_supported_cpuid_entry(0)->eax;
1098}
1099
1100uint32_t kvm_get_cpuid_max_extended(void)
1101{
1102        return kvm_get_supported_cpuid_entry(0x80000000)->eax;
1103}
1104
1105void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
1106{
1107        struct kvm_cpuid_entry2 *entry;
1108        bool pae;
1109
1110        /* SDM 4.1.4 */
1111        if (kvm_get_cpuid_max_extended() < 0x80000008) {
1112                pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
1113                *pa_bits = pae ? 36 : 32;
1114                *va_bits = 32;
1115        } else {
1116                entry = kvm_get_supported_cpuid_entry(0x80000008);
1117                *pa_bits = entry->eax & 0xff;
1118                *va_bits = (entry->eax >> 8) & 0xff;
1119        }
1120}
1121