linux/tools/testing/selftests/kvm/lib/x86_64/processor.c
<<
>>
Prefs
   1/*
   2 * tools/testing/selftests/kvm/lib/x86_64/processor.c
   3 *
   4 * Copyright (C) 2018, Google LLC.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2.
   7 */
   8
   9#define _GNU_SOURCE /* for program_invocation_name */
  10
  11#include "test_util.h"
  12#include "kvm_util.h"
  13#include "../kvm_util_internal.h"
  14#include "processor.h"
  15
  16/* Minimum physical address used for virtual translation tables. */
  17#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
  18
  19/* Virtual translation table structure declarations */
  20struct pageMapL4Entry {
  21        uint64_t present:1;
  22        uint64_t writable:1;
  23        uint64_t user:1;
  24        uint64_t write_through:1;
  25        uint64_t cache_disable:1;
  26        uint64_t accessed:1;
  27        uint64_t ignored_06:1;
  28        uint64_t page_size:1;
  29        uint64_t ignored_11_08:4;
  30        uint64_t address:40;
  31        uint64_t ignored_62_52:11;
  32        uint64_t execute_disable:1;
  33};
  34
  35struct pageDirectoryPointerEntry {
  36        uint64_t present:1;
  37        uint64_t writable:1;
  38        uint64_t user:1;
  39        uint64_t write_through:1;
  40        uint64_t cache_disable:1;
  41        uint64_t accessed:1;
  42        uint64_t ignored_06:1;
  43        uint64_t page_size:1;
  44        uint64_t ignored_11_08:4;
  45        uint64_t address:40;
  46        uint64_t ignored_62_52:11;
  47        uint64_t execute_disable:1;
  48};
  49
  50struct pageDirectoryEntry {
  51        uint64_t present:1;
  52        uint64_t writable:1;
  53        uint64_t user:1;
  54        uint64_t write_through:1;
  55        uint64_t cache_disable:1;
  56        uint64_t accessed:1;
  57        uint64_t ignored_06:1;
  58        uint64_t page_size:1;
  59        uint64_t ignored_11_08:4;
  60        uint64_t address:40;
  61        uint64_t ignored_62_52:11;
  62        uint64_t execute_disable:1;
  63};
  64
  65struct pageTableEntry {
  66        uint64_t present:1;
  67        uint64_t writable:1;
  68        uint64_t user:1;
  69        uint64_t write_through:1;
  70        uint64_t cache_disable:1;
  71        uint64_t accessed:1;
  72        uint64_t dirty:1;
  73        uint64_t reserved_07:1;
  74        uint64_t global:1;
  75        uint64_t ignored_11_09:3;
  76        uint64_t address:40;
  77        uint64_t ignored_62_52:11;
  78        uint64_t execute_disable:1;
  79};
  80
  81/* Register Dump
  82 *
  83 * Input Args:
  84 *   indent - Left margin indent amount
  85 *   regs - register
  86 *
  87 * Output Args:
  88 *   stream - Output FILE stream
  89 *
  90 * Return: None
  91 *
  92 * Dumps the state of the registers given by regs, to the FILE stream
  93 * given by steam.
  94 */
  95void regs_dump(FILE *stream, struct kvm_regs *regs,
  96               uint8_t indent)
  97{
  98        fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
  99                "rcx: 0x%.16llx rdx: 0x%.16llx\n",
 100                indent, "",
 101                regs->rax, regs->rbx, regs->rcx, regs->rdx);
 102        fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
 103                "rsp: 0x%.16llx rbp: 0x%.16llx\n",
 104                indent, "",
 105                regs->rsi, regs->rdi, regs->rsp, regs->rbp);
 106        fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
 107                "r10: 0x%.16llx r11: 0x%.16llx\n",
 108                indent, "",
 109                regs->r8, regs->r9, regs->r10, regs->r11);
 110        fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
 111                "r14: 0x%.16llx r15: 0x%.16llx\n",
 112                indent, "",
 113                regs->r12, regs->r13, regs->r14, regs->r15);
 114        fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
 115                indent, "",
 116                regs->rip, regs->rflags);
 117}
 118
 119/* Segment Dump
 120 *
 121 * Input Args:
 122 *   indent - Left margin indent amount
 123 *   segment - KVM segment
 124 *
 125 * Output Args:
 126 *   stream - Output FILE stream
 127 *
 128 * Return: None
 129 *
 130 * Dumps the state of the KVM segment given by segment, to the FILE stream
 131 * given by steam.
 132 */
 133static void segment_dump(FILE *stream, struct kvm_segment *segment,
 134                         uint8_t indent)
 135{
 136        fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
 137                "selector: 0x%.4x type: 0x%.2x\n",
 138                indent, "", segment->base, segment->limit,
 139                segment->selector, segment->type);
 140        fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
 141                "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
 142                indent, "", segment->present, segment->dpl,
 143                segment->db, segment->s, segment->l);
 144        fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
 145                "unusable: 0x%.2x padding: 0x%.2x\n",
 146                indent, "", segment->g, segment->avl,
 147                segment->unusable, segment->padding);
 148}
 149
 150/* dtable Dump
 151 *
 152 * Input Args:
 153 *   indent - Left margin indent amount
 154 *   dtable - KVM dtable
 155 *
 156 * Output Args:
 157 *   stream - Output FILE stream
 158 *
 159 * Return: None
 160 *
 161 * Dumps the state of the KVM dtable given by dtable, to the FILE stream
 162 * given by steam.
 163 */
 164static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
 165                        uint8_t indent)
 166{
 167        fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
 168                "padding: 0x%.4x 0x%.4x 0x%.4x\n",
 169                indent, "", dtable->base, dtable->limit,
 170                dtable->padding[0], dtable->padding[1], dtable->padding[2]);
 171}
 172
 173/* System Register Dump
 174 *
 175 * Input Args:
 176 *   indent - Left margin indent amount
 177 *   sregs - System registers
 178 *
 179 * Output Args:
 180 *   stream - Output FILE stream
 181 *
 182 * Return: None
 183 *
 184 * Dumps the state of the system registers given by sregs, to the FILE stream
 185 * given by steam.
 186 */
 187void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
 188                uint8_t indent)
 189{
 190        unsigned int i;
 191
 192        fprintf(stream, "%*scs:\n", indent, "");
 193        segment_dump(stream, &sregs->cs, indent + 2);
 194        fprintf(stream, "%*sds:\n", indent, "");
 195        segment_dump(stream, &sregs->ds, indent + 2);
 196        fprintf(stream, "%*ses:\n", indent, "");
 197        segment_dump(stream, &sregs->es, indent + 2);
 198        fprintf(stream, "%*sfs:\n", indent, "");
 199        segment_dump(stream, &sregs->fs, indent + 2);
 200        fprintf(stream, "%*sgs:\n", indent, "");
 201        segment_dump(stream, &sregs->gs, indent + 2);
 202        fprintf(stream, "%*sss:\n", indent, "");
 203        segment_dump(stream, &sregs->ss, indent + 2);
 204        fprintf(stream, "%*str:\n", indent, "");
 205        segment_dump(stream, &sregs->tr, indent + 2);
 206        fprintf(stream, "%*sldt:\n", indent, "");
 207        segment_dump(stream, &sregs->ldt, indent + 2);
 208
 209        fprintf(stream, "%*sgdt:\n", indent, "");
 210        dtable_dump(stream, &sregs->gdt, indent + 2);
 211        fprintf(stream, "%*sidt:\n", indent, "");
 212        dtable_dump(stream, &sregs->idt, indent + 2);
 213
 214        fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
 215                "cr3: 0x%.16llx cr4: 0x%.16llx\n",
 216                indent, "",
 217                sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
 218        fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
 219                "apic_base: 0x%.16llx\n",
 220                indent, "",
 221                sregs->cr8, sregs->efer, sregs->apic_base);
 222
 223        fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
 224        for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
 225                fprintf(stream, "%*s%.16llx\n", indent + 2, "",
 226                        sregs->interrupt_bitmap[i]);
 227        }
 228}
 229
 230void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 231{
 232        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 233                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 234
 235        /* If needed, create page map l4 table. */
 236        if (!vm->pgd_created) {
 237                vm_paddr_t paddr = vm_phy_page_alloc(vm,
 238                        KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
 239                vm->pgd = paddr;
 240                vm->pgd_created = true;
 241        }
 242}
 243
 244/* VM Virtual Page Map
 245 *
 246 * Input Args:
 247 *   vm - Virtual Machine
 248 *   vaddr - VM Virtual Address
 249 *   paddr - VM Physical Address
 250 *   pgd_memslot - Memory region slot for new virtual translation tables
 251 *
 252 * Output Args: None
 253 *
 254 * Return: None
 255 *
 256 * Within the VM given by vm, creates a virtual translation for the page
 257 * starting at vaddr to the page starting at paddr.
 258 */
 259void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 260        uint32_t pgd_memslot)
 261{
 262        uint16_t index[4];
 263        struct pageMapL4Entry *pml4e;
 264
 265        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 266                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 267
 268        TEST_ASSERT((vaddr % vm->page_size) == 0,
 269                "Virtual address not on page boundary,\n"
 270                "  vaddr: 0x%lx vm->page_size: 0x%x",
 271                vaddr, vm->page_size);
 272        TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
 273                (vaddr >> vm->page_shift)),
 274                "Invalid virtual address, vaddr: 0x%lx",
 275                vaddr);
 276        TEST_ASSERT((paddr % vm->page_size) == 0,
 277                "Physical address not on page boundary,\n"
 278                "  paddr: 0x%lx vm->page_size: 0x%x",
 279                paddr, vm->page_size);
 280        TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
 281                "Physical address beyond beyond maximum supported,\n"
 282                "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 283                paddr, vm->max_gfn, vm->page_size);
 284
 285        index[0] = (vaddr >> 12) & 0x1ffu;
 286        index[1] = (vaddr >> 21) & 0x1ffu;
 287        index[2] = (vaddr >> 30) & 0x1ffu;
 288        index[3] = (vaddr >> 39) & 0x1ffu;
 289
 290        /* Allocate page directory pointer table if not present. */
 291        pml4e = addr_gpa2hva(vm, vm->pgd);
 292        if (!pml4e[index[3]].present) {
 293                pml4e[index[3]].address = vm_phy_page_alloc(vm,
 294                        KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
 295                        >> vm->page_shift;
 296                pml4e[index[3]].writable = true;
 297                pml4e[index[3]].present = true;
 298        }
 299
 300        /* Allocate page directory table if not present. */
 301        struct pageDirectoryPointerEntry *pdpe;
 302        pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
 303        if (!pdpe[index[2]].present) {
 304                pdpe[index[2]].address = vm_phy_page_alloc(vm,
 305                        KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
 306                        >> vm->page_shift;
 307                pdpe[index[2]].writable = true;
 308                pdpe[index[2]].present = true;
 309        }
 310
 311        /* Allocate page table if not present. */
 312        struct pageDirectoryEntry *pde;
 313        pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
 314        if (!pde[index[1]].present) {
 315                pde[index[1]].address = vm_phy_page_alloc(vm,
 316                        KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
 317                        >> vm->page_shift;
 318                pde[index[1]].writable = true;
 319                pde[index[1]].present = true;
 320        }
 321
 322        /* Fill in page table entry. */
 323        struct pageTableEntry *pte;
 324        pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
 325        pte[index[0]].address = paddr >> vm->page_shift;
 326        pte[index[0]].writable = true;
 327        pte[index[0]].present = 1;
 328}
 329
 330/* Virtual Translation Tables Dump
 331 *
 332 * Input Args:
 333 *   vm - Virtual Machine
 334 *   indent - Left margin indent amount
 335 *
 336 * Output Args:
 337 *   stream - Output FILE stream
 338 *
 339 * Return: None
 340 *
 341 * Dumps to the FILE stream given by stream, the contents of all the
 342 * virtual translation tables for the VM given by vm.
 343 */
 344void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 345{
 346        struct pageMapL4Entry *pml4e, *pml4e_start;
 347        struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
 348        struct pageDirectoryEntry *pde, *pde_start;
 349        struct pageTableEntry *pte, *pte_start;
 350
 351        if (!vm->pgd_created)
 352                return;
 353
 354        fprintf(stream, "%*s                                          "
 355                "                no\n", indent, "");
 356        fprintf(stream, "%*s      index hvaddr         gpaddr         "
 357                "addr         w exec dirty\n",
 358                indent, "");
 359        pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
 360                vm->pgd);
 361        for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
 362                pml4e = &pml4e_start[n1];
 363                if (!pml4e->present)
 364                        continue;
 365                fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
 366                        " %u\n",
 367                        indent, "",
 368                        pml4e - pml4e_start, pml4e,
 369                        addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
 370                        pml4e->writable, pml4e->execute_disable);
 371
 372                pdpe_start = addr_gpa2hva(vm, pml4e->address
 373                        * vm->page_size);
 374                for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
 375                        pdpe = &pdpe_start[n2];
 376                        if (!pdpe->present)
 377                                continue;
 378                        fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
 379                                "%u  %u\n",
 380                                indent, "",
 381                                pdpe - pdpe_start, pdpe,
 382                                addr_hva2gpa(vm, pdpe),
 383                                (uint64_t) pdpe->address, pdpe->writable,
 384                                pdpe->execute_disable);
 385
 386                        pde_start = addr_gpa2hva(vm,
 387                                pdpe->address * vm->page_size);
 388                        for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
 389                                pde = &pde_start[n3];
 390                                if (!pde->present)
 391                                        continue;
 392                                fprintf(stream, "%*spde   0x%-3zx %p "
 393                                        "0x%-12lx 0x%-10lx %u  %u\n",
 394                                        indent, "", pde - pde_start, pde,
 395                                        addr_hva2gpa(vm, pde),
 396                                        (uint64_t) pde->address, pde->writable,
 397                                        pde->execute_disable);
 398
 399                                pte_start = addr_gpa2hva(vm,
 400                                        pde->address * vm->page_size);
 401                                for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
 402                                        pte = &pte_start[n4];
 403                                        if (!pte->present)
 404                                                continue;
 405                                        fprintf(stream, "%*spte   0x%-3zx %p "
 406                                                "0x%-12lx 0x%-10lx %u  %u "
 407                                                "    %u    0x%-10lx\n",
 408                                                indent, "",
 409                                                pte - pte_start, pte,
 410                                                addr_hva2gpa(vm, pte),
 411                                                (uint64_t) pte->address,
 412                                                pte->writable,
 413                                                pte->execute_disable,
 414                                                pte->dirty,
 415                                                ((uint64_t) n1 << 27)
 416                                                        | ((uint64_t) n2 << 18)
 417                                                        | ((uint64_t) n3 << 9)
 418                                                        | ((uint64_t) n4));
 419                                }
 420                        }
 421                }
 422        }
 423}
 424
 425/* Set Unusable Segment
 426 *
 427 * Input Args: None
 428 *
 429 * Output Args:
 430 *   segp - Pointer to segment register
 431 *
 432 * Return: None
 433 *
 434 * Sets the segment register pointed to by segp to an unusable state.
 435 */
 436static void kvm_seg_set_unusable(struct kvm_segment *segp)
 437{
 438        memset(segp, 0, sizeof(*segp));
 439        segp->unusable = true;
 440}
 441
 442static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
 443{
 444        void *gdt = addr_gva2hva(vm, vm->gdt);
 445        struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
 446
 447        desc->limit0 = segp->limit & 0xFFFF;
 448        desc->base0 = segp->base & 0xFFFF;
 449        desc->base1 = segp->base >> 16;
 450        desc->s = segp->s;
 451        desc->type = segp->type;
 452        desc->dpl = segp->dpl;
 453        desc->p = segp->present;
 454        desc->limit1 = segp->limit >> 16;
 455        desc->l = segp->l;
 456        desc->db = segp->db;
 457        desc->g = segp->g;
 458        desc->base2 = segp->base >> 24;
 459        if (!segp->s)
 460                desc->base3 = segp->base >> 32;
 461}
 462
 463
 464/* Set Long Mode Flat Kernel Code Segment
 465 *
 466 * Input Args:
 467 *   vm - VM whose GDT is being filled, or NULL to only write segp
 468 *   selector - selector value
 469 *
 470 * Output Args:
 471 *   segp - Pointer to KVM segment
 472 *
 473 * Return: None
 474 *
 475 * Sets up the KVM segment pointed to by segp, to be a code segment
 476 * with the selector value given by selector.
 477 */
 478static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
 479        struct kvm_segment *segp)
 480{
 481        memset(segp, 0, sizeof(*segp));
 482        segp->selector = selector;
 483        segp->limit = 0xFFFFFFFFu;
 484        segp->s = 0x1; /* kTypeCodeData */
 485        segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
 486                                          * | kFlagCodeReadable
 487                                          */
 488        segp->g = true;
 489        segp->l = true;
 490        segp->present = 1;
 491        if (vm)
 492                kvm_seg_fill_gdt_64bit(vm, segp);
 493}
 494
 495/* Set Long Mode Flat Kernel Data Segment
 496 *
 497 * Input Args:
 498 *   vm - VM whose GDT is being filled, or NULL to only write segp
 499 *   selector - selector value
 500 *
 501 * Output Args:
 502 *   segp - Pointer to KVM segment
 503 *
 504 * Return: None
 505 *
 506 * Sets up the KVM segment pointed to by segp, to be a data segment
 507 * with the selector value given by selector.
 508 */
 509static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 510        struct kvm_segment *segp)
 511{
 512        memset(segp, 0, sizeof(*segp));
 513        segp->selector = selector;
 514        segp->limit = 0xFFFFFFFFu;
 515        segp->s = 0x1; /* kTypeCodeData */
 516        segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
 517                                          * | kFlagDataWritable
 518                                          */
 519        segp->g = true;
 520        segp->present = true;
 521        if (vm)
 522                kvm_seg_fill_gdt_64bit(vm, segp);
 523}
 524
 525/* Address Guest Virtual to Guest Physical
 526 *
 527 * Input Args:
 528 *   vm - Virtual Machine
 529 *   gpa - VM virtual address
 530 *
 531 * Output Args: None
 532 *
 533 * Return:
 534 *   Equivalent VM physical address
 535 *
 536 * Translates the VM virtual address given by gva to a VM physical
 537 * address and then locates the memory region containing the VM
 538 * physical address, within the VM given by vm.  When found, the host
 539 * virtual address providing the memory to the vm physical address is returned.
 540 * A TEST_ASSERT failure occurs if no region containing translated
 541 * VM virtual address exists.
 542 */
 543vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 544{
 545        uint16_t index[4];
 546        struct pageMapL4Entry *pml4e;
 547        struct pageDirectoryPointerEntry *pdpe;
 548        struct pageDirectoryEntry *pde;
 549        struct pageTableEntry *pte;
 550
 551        TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 552                "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 553
 554        index[0] = (gva >> 12) & 0x1ffu;
 555        index[1] = (gva >> 21) & 0x1ffu;
 556        index[2] = (gva >> 30) & 0x1ffu;
 557        index[3] = (gva >> 39) & 0x1ffu;
 558
 559        if (!vm->pgd_created)
 560                goto unmapped_gva;
 561        pml4e = addr_gpa2hva(vm, vm->pgd);
 562        if (!pml4e[index[3]].present)
 563                goto unmapped_gva;
 564
 565        pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
 566        if (!pdpe[index[2]].present)
 567                goto unmapped_gva;
 568
 569        pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
 570        if (!pde[index[1]].present)
 571                goto unmapped_gva;
 572
 573        pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
 574        if (!pte[index[0]].present)
 575                goto unmapped_gva;
 576
 577        return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
 578
 579unmapped_gva:
 580        TEST_ASSERT(false, "No mapping for vm virtual address, "
 581                    "gva: 0x%lx", gva);
 582        exit(EXIT_FAILURE);
 583}
 584
 585static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
 586                          int pgd_memslot)
 587{
 588        if (!vm->gdt)
 589                vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
 590                        KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
 591
 592        dt->base = vm->gdt;
 593        dt->limit = getpagesize();
 594}
 595
 596static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
 597                                int selector, int gdt_memslot,
 598                                int pgd_memslot)
 599{
 600        if (!vm->tss)
 601                vm->tss = vm_vaddr_alloc(vm, getpagesize(),
 602                        KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
 603
 604        memset(segp, 0, sizeof(*segp));
 605        segp->base = vm->tss;
 606        segp->limit = 0x67;
 607        segp->selector = selector;
 608        segp->type = 0xb;
 609        segp->present = 1;
 610        kvm_seg_fill_gdt_64bit(vm, segp);
 611}
 612
 613static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
 614{
 615        struct kvm_sregs sregs;
 616
 617        /* Set mode specific system register values. */
 618        vcpu_sregs_get(vm, vcpuid, &sregs);
 619
 620        sregs.idt.limit = 0;
 621
 622        kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
 623
 624        switch (vm->mode) {
 625        case VM_MODE_PXXV48_4K:
 626                sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
 627                sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
 628                sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
 629
 630                kvm_seg_set_unusable(&sregs.ldt);
 631                kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
 632                kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
 633                kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
 634                kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
 635                break;
 636
 637        default:
 638                TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
 639        }
 640
 641        sregs.cr3 = vm->pgd;
 642        vcpu_sregs_set(vm, vcpuid, &sregs);
 643}
 644/* Adds a vCPU with reasonable defaults (i.e., a stack)
 645 *
 646 * Input Args:
 647 *   vcpuid - The id of the VCPU to add to the VM.
 648 *   guest_code - The vCPU's entry point
 649 */
 650void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 651{
 652        struct kvm_mp_state mp_state;
 653        struct kvm_regs regs;
 654        vm_vaddr_t stack_vaddr;
 655        stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
 656                                     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
 657
 658        /* Create VCPU */
 659        vm_vcpu_add(vm, vcpuid);
 660        vcpu_setup(vm, vcpuid, 0, 0);
 661
 662        /* Setup guest general purpose registers */
 663        vcpu_regs_get(vm, vcpuid, &regs);
 664        regs.rflags = regs.rflags | 0x2;
 665        regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
 666        regs.rip = (unsigned long) guest_code;
 667        vcpu_regs_set(vm, vcpuid, &regs);
 668
 669        /* Setup the MP state */
 670        mp_state.mp_state = 0;
 671        vcpu_set_mp_state(vm, vcpuid, &mp_state);
 672}
 673
 674/* Allocate an instance of struct kvm_cpuid2
 675 *
 676 * Input Args: None
 677 *
 678 * Output Args: None
 679 *
 680 * Return: A pointer to the allocated struct. The caller is responsible
 681 * for freeing this struct.
 682 *
 683 * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
 684 * array to be decided at allocation time, allocation is slightly
 685 * complicated. This function uses a reasonable default length for
 686 * the array and performs the appropriate allocation.
 687 */
 688static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
 689{
 690        struct kvm_cpuid2 *cpuid;
 691        int nent = 100;
 692        size_t size;
 693
 694        size = sizeof(*cpuid);
 695        size += nent * sizeof(struct kvm_cpuid_entry2);
 696        cpuid = malloc(size);
 697        if (!cpuid) {
 698                perror("malloc");
 699                abort();
 700        }
 701
 702        cpuid->nent = nent;
 703
 704        return cpuid;
 705}
 706
 707/* KVM Supported CPUID Get
 708 *
 709 * Input Args: None
 710 *
 711 * Output Args:
 712 *
 713 * Return: The supported KVM CPUID
 714 *
 715 * Get the guest CPUID supported by KVM.
 716 */
 717struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
 718{
 719        static struct kvm_cpuid2 *cpuid;
 720        int ret;
 721        int kvm_fd;
 722
 723        if (cpuid)
 724                return cpuid;
 725
 726        cpuid = allocate_kvm_cpuid2();
 727        kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
 728        if (kvm_fd < 0)
 729                exit(KSFT_SKIP);
 730
 731        ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
 732        TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
 733                    ret, errno);
 734
 735        close(kvm_fd);
 736        return cpuid;
 737}
 738
 739/* Locate a cpuid entry.
 740 *
 741 * Input Args:
 742 *   cpuid: The cpuid.
 743 *   function: The function of the cpuid entry to find.
 744 *
 745 * Output Args: None
 746 *
 747 * Return: A pointer to the cpuid entry. Never returns NULL.
 748 */
 749struct kvm_cpuid_entry2 *
 750kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
 751{
 752        struct kvm_cpuid2 *cpuid;
 753        struct kvm_cpuid_entry2 *entry = NULL;
 754        int i;
 755
 756        cpuid = kvm_get_supported_cpuid();
 757        for (i = 0; i < cpuid->nent; i++) {
 758                if (cpuid->entries[i].function == function &&
 759                    cpuid->entries[i].index == index) {
 760                        entry = &cpuid->entries[i];
 761                        break;
 762                }
 763        }
 764
 765        TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
 766                    function, index);
 767        return entry;
 768}
 769
 770/* VM VCPU CPUID Set
 771 *
 772 * Input Args:
 773 *   vm - Virtual Machine
 774 *   vcpuid - VCPU id
 775 *   cpuid - The CPUID values to set.
 776 *
 777 * Output Args: None
 778 *
 779 * Return: void
 780 *
 781 * Set the VCPU's CPUID.
 782 */
 783void vcpu_set_cpuid(struct kvm_vm *vm,
 784                uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
 785{
 786        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 787        int rc;
 788
 789        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 790
 791        rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
 792        TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
 793                    rc, errno);
 794
 795}
 796
 797/* Create a VM with reasonable defaults
 798 *
 799 * Input Args:
 800 *   vcpuid - The id of the single VCPU to add to the VM.
 801 *   extra_mem_pages - The size of extra memories to add (this will
 802 *                     decide how much extra space we will need to
 803 *                     setup the page tables using mem slot 0)
 804 *   guest_code - The vCPU's entry point
 805 *
 806 * Output Args: None
 807 *
 808 * Return:
 809 *   Pointer to opaque structure that describes the created VM.
 810 */
 811struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 812                                 void *guest_code)
 813{
 814        struct kvm_vm *vm;
 815        /*
 816         * For x86 the maximum page table size for a memory region
 817         * will be when only 4K pages are used.  In that case the
 818         * total extra size for page tables (for extra N pages) will
 819         * be: N/512+N/512^2+N/512^3+... which is definitely smaller
 820         * than N/512*2.
 821         */
 822        uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 823
 824        /* Create VM */
 825        vm = vm_create(VM_MODE_DEFAULT,
 826                       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
 827                       O_RDWR);
 828
 829        /* Setup guest code */
 830        kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 831
 832        /* Setup IRQ Chip */
 833        vm_create_irqchip(vm);
 834
 835        /* Add the first vCPU. */
 836        vm_vcpu_add_default(vm, vcpuid, guest_code);
 837
 838        return vm;
 839}
 840
 841/* VCPU Get MSR
 842 *
 843 * Input Args:
 844 *   vm - Virtual Machine
 845 *   vcpuid - VCPU ID
 846 *   msr_index - Index of MSR
 847 *
 848 * Output Args: None
 849 *
 850 * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
 851 *
 852 * Get value of MSR for VCPU.
 853 */
 854uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
 855{
 856        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 857        struct {
 858                struct kvm_msrs header;
 859                struct kvm_msr_entry entry;
 860        } buffer = {};
 861        int r;
 862
 863        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 864        buffer.header.nmsrs = 1;
 865        buffer.entry.index = msr_index;
 866        r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
 867        TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
 868                "  rc: %i errno: %i", r, errno);
 869
 870        return buffer.entry.data;
 871}
 872
 873/* VCPU Set MSR
 874 *
 875 * Input Args:
 876 *   vm - Virtual Machine
 877 *   vcpuid - VCPU ID
 878 *   msr_index - Index of MSR
 879 *   msr_value - New value of MSR
 880 *
 881 * Output Args: None
 882 *
 883 * Return: On success, nothing. On failure a TEST_ASSERT is produced.
 884 *
 885 * Set value of MSR for VCPU.
 886 */
 887void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
 888        uint64_t msr_value)
 889{
 890        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 891        struct {
 892                struct kvm_msrs header;
 893                struct kvm_msr_entry entry;
 894        } buffer = {};
 895        int r;
 896
 897        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 898        memset(&buffer, 0, sizeof(buffer));
 899        buffer.header.nmsrs = 1;
 900        buffer.entry.index = msr_index;
 901        buffer.entry.data = msr_value;
 902        r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
 903        TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
 904                "  rc: %i errno: %i", r, errno);
 905}
 906
 907/* VM VCPU Args Set
 908 *
 909 * Input Args:
 910 *   vm - Virtual Machine
 911 *   vcpuid - VCPU ID
 912 *   num - number of arguments
 913 *   ... - arguments, each of type uint64_t
 914 *
 915 * Output Args: None
 916 *
 917 * Return: None
 918 *
 919 * Sets the first num function input arguments to the values
 920 * given as variable args.  Each of the variable args is expected to
 921 * be of type uint64_t.
 922 */
 923void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 924{
 925        va_list ap;
 926        struct kvm_regs regs;
 927
 928        TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
 929                    "  num: %u\n",
 930                    num);
 931
 932        va_start(ap, num);
 933        vcpu_regs_get(vm, vcpuid, &regs);
 934
 935        if (num >= 1)
 936                regs.rdi = va_arg(ap, uint64_t);
 937
 938        if (num >= 2)
 939                regs.rsi = va_arg(ap, uint64_t);
 940
 941        if (num >= 3)
 942                regs.rdx = va_arg(ap, uint64_t);
 943
 944        if (num >= 4)
 945                regs.rcx = va_arg(ap, uint64_t);
 946
 947        if (num >= 5)
 948                regs.r8 = va_arg(ap, uint64_t);
 949
 950        if (num >= 6)
 951                regs.r9 = va_arg(ap, uint64_t);
 952
 953        vcpu_regs_set(vm, vcpuid, &regs);
 954        va_end(ap);
 955}
 956
 957/*
 958 * VM VCPU Dump
 959 *
 960 * Input Args:
 961 *   vm - Virtual Machine
 962 *   vcpuid - VCPU ID
 963 *   indent - Left margin indent amount
 964 *
 965 * Output Args:
 966 *   stream - Output FILE stream
 967 *
 968 * Return: None
 969 *
 970 * Dumps the current state of the VCPU specified by vcpuid, within the VM
 971 * given by vm, to the FILE stream given by stream.
 972 */
 973void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 974{
 975        struct kvm_regs regs;
 976        struct kvm_sregs sregs;
 977
 978        fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
 979
 980        fprintf(stream, "%*sregs:\n", indent + 2, "");
 981        vcpu_regs_get(vm, vcpuid, &regs);
 982        regs_dump(stream, &regs, indent + 4);
 983
 984        fprintf(stream, "%*ssregs:\n", indent + 2, "");
 985        vcpu_sregs_get(vm, vcpuid, &sregs);
 986        sregs_dump(stream, &sregs, indent + 4);
 987}
 988
 989struct kvm_x86_state {
 990        struct kvm_vcpu_events events;
 991        struct kvm_mp_state mp_state;
 992        struct kvm_regs regs;
 993        struct kvm_xsave xsave;
 994        struct kvm_xcrs xcrs;
 995        struct kvm_sregs sregs;
 996        struct kvm_debugregs debugregs;
 997        union {
 998                struct kvm_nested_state nested;
 999                char nested_[16384];
1000        };
1001        struct kvm_msrs msrs;
1002};
1003
1004static int kvm_get_num_msrs(struct kvm_vm *vm)
1005{
1006        struct kvm_msr_list nmsrs;
1007        int r;
1008
1009        nmsrs.nmsrs = 0;
1010        r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
1011        TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
1012                r);
1013
1014        return nmsrs.nmsrs;
1015}
1016
1017struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
1018{
1019        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1020        struct kvm_msr_list *list;
1021        struct kvm_x86_state *state;
1022        int nmsrs, r, i;
1023        static int nested_size = -1;
1024
1025        if (nested_size == -1) {
1026                nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
1027                TEST_ASSERT(nested_size <= sizeof(state->nested_),
1028                            "Nested state size too big, %i > %zi",
1029                            nested_size, sizeof(state->nested_));
1030        }
1031
1032        /*
1033         * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees
1034         * guest state is consistent only after userspace re-enters the
1035         * kernel with KVM_RUN.  Complete IO prior to migrating state
1036         * to a new VM.
1037         */
1038        vcpu_run_complete_io(vm, vcpuid);
1039
1040        nmsrs = kvm_get_num_msrs(vm);
1041        list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
1042        list->nmsrs = nmsrs;
1043        r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
1044        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
1045                r);
1046
1047        state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
1048        r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
1049        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
1050                r);
1051
1052        r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
1053        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
1054                r);
1055
1056        r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
1057        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
1058                r);
1059
1060        r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
1061        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
1062                r);
1063
1064        if (kvm_check_cap(KVM_CAP_XCRS)) {
1065                r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
1066                TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
1067                            r);
1068        }
1069
1070        r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
1071        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
1072                r);
1073
1074        if (nested_size) {
1075                state->nested.size = sizeof(state->nested_);
1076                r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
1077                TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
1078                        r);
1079                TEST_ASSERT(state->nested.size <= nested_size,
1080                        "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
1081                        state->nested.size, nested_size);
1082        } else
1083                state->nested.size = 0;
1084
1085        state->msrs.nmsrs = nmsrs;
1086        for (i = 0; i < nmsrs; i++)
1087                state->msrs.entries[i].index = list->indices[i];
1088        r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
1089        TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
1090                r, r == nmsrs ? -1 : list->indices[r]);
1091
1092        r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
1093        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
1094                r);
1095
1096        free(list);
1097        return state;
1098}
1099
1100void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
1101{
1102        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
1103        int r;
1104
1105        r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
1106        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
1107                r);
1108
1109        if (kvm_check_cap(KVM_CAP_XCRS)) {
1110                r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
1111                TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
1112                            r);
1113        }
1114
1115        r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
1116        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
1117                r);
1118
1119        r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
1120        TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
1121                r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
1122
1123        r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
1124        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
1125                r);
1126
1127        r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
1128        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
1129                r);
1130
1131        r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
1132        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
1133                r);
1134
1135        r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
1136        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
1137                r);
1138
1139        if (state->nested.size) {
1140                r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
1141                TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
1142                        r);
1143        }
1144}
1145
1146bool is_intel_cpu(void)
1147{
1148        int eax, ebx, ecx, edx;
1149        const uint32_t *chunk;
1150        const int leaf = 0;
1151
1152        __asm__ __volatile__(
1153                "cpuid"
1154                : /* output */ "=a"(eax), "=b"(ebx),
1155                  "=c"(ecx), "=d"(edx)
1156                : /* input */ "0"(leaf), "2"(0));
1157
1158        chunk = (const uint32_t *)("GenuineIntel");
1159        return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
1160}
1161
1162uint32_t kvm_get_cpuid_max(void)
1163{
1164        return kvm_get_supported_cpuid_entry(0x80000000)->eax;
1165}
1166
1167void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
1168{
1169        struct kvm_cpuid_entry2 *entry;
1170        bool pae;
1171
1172        /* SDM 4.1.4 */
1173        if (kvm_get_cpuid_max() < 0x80000008) {
1174                pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
1175                *pa_bits = pae ? 36 : 32;
1176                *va_bits = 32;
1177        } else {
1178                entry = kvm_get_supported_cpuid_entry(0x80000008);
1179                *pa_bits = entry->eax & 0xff;
1180                *va_bits = (entry->eax >> 8) & 0xff;
1181        }
1182}
1183