1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include "qemu/osdep.h"
16#include "qapi/qapi-events-run-state.h"
17#include "qapi/error.h"
18#include <sys/ioctl.h>
19#include <sys/utsname.h>
20#include <sys/syscall.h>
21
22#include <linux/kvm.h>
23#include "standard-headers/asm-x86/kvm_para.h"
24
25#include "cpu.h"
26#include "host-cpu.h"
27#include "sysemu/sysemu.h"
28#include "sysemu/hw_accel.h"
29#include "sysemu/kvm_int.h"
30#include "sysemu/runstate.h"
31#include "kvm_i386.h"
32#include "sev.h"
33#include "hyperv.h"
34#include "hyperv-proto.h"
35
36#include "exec/gdbstub.h"
37#include "qemu/host-utils.h"
38#include "qemu/main-loop.h"
39#include "qemu/config-file.h"
40#include "qemu/error-report.h"
41#include "qemu/memalign.h"
42#include "hw/i386/x86.h"
43#include "hw/i386/apic.h"
44#include "hw/i386/apic_internal.h"
45#include "hw/i386/apic-msidef.h"
46#include "hw/i386/intel_iommu.h"
47#include "hw/i386/x86-iommu.h"
48#include "hw/i386/e820_memory_layout.h"
49
50#include "hw/pci/pci.h"
51#include "hw/pci/msi.h"
52#include "hw/pci/msix.h"
53#include "migration/blocker.h"
54#include "exec/memattrs.h"
55#include "trace.h"
56
57
58
59#ifdef DEBUG_KVM
60#define DPRINTF(fmt, ...) \
61 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
62#else
63#define DPRINTF(fmt, ...) \
64 do { } while (0)
65#endif
66
67
68#define KVM_APIC_BUS_CYCLE_NS 1
69#define KVM_APIC_BUS_FREQUENCY (1000000000ULL / KVM_APIC_BUS_CYCLE_NS)
70
71#define MSR_KVM_WALL_CLOCK 0x11
72#define MSR_KVM_SYSTEM_TIME 0x12
73
74
75
76#define MSR_BUF_SIZE 4096
77
78static void kvm_init_msrs(X86CPU *cpu);
79
80const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
81 KVM_CAP_INFO(SET_TSS_ADDR),
82 KVM_CAP_INFO(EXT_CPUID),
83 KVM_CAP_INFO(MP_STATE),
84 KVM_CAP_LAST_INFO
85};
86
87static bool has_msr_star;
88static bool has_msr_hsave_pa;
89static bool has_msr_tsc_aux;
90static bool has_msr_tsc_adjust;
91static bool has_msr_tsc_deadline;
92static bool has_msr_feature_control;
93static bool has_msr_misc_enable;
94static bool has_msr_smbase;
95static bool has_msr_bndcfgs;
96static int lm_capable_kernel;
97static bool has_msr_hv_hypercall;
98static bool has_msr_hv_crash;
99static bool has_msr_hv_reset;
100static bool has_msr_hv_vpindex;
101static bool hv_vpindex_settable;
102static bool has_msr_hv_runtime;
103static bool has_msr_hv_synic;
104static bool has_msr_hv_stimer;
105static bool has_msr_hv_frequencies;
106static bool has_msr_hv_reenlightenment;
107static bool has_msr_xss;
108static bool has_msr_umwait;
109static bool has_msr_spec_ctrl;
110static bool has_tsc_scale_msr;
111static bool has_msr_tsx_ctrl;
112static bool has_msr_virt_ssbd;
113static bool has_msr_smi_count;
114static bool has_msr_arch_capabs;
115static bool has_msr_core_capabs;
116static bool has_msr_vmx_vmfunc;
117static bool has_msr_ucode_rev;
118static bool has_msr_vmx_procbased_ctls2;
119static bool has_msr_perf_capabs;
120static bool has_msr_pkrs;
121
122static uint32_t has_architectural_pmu_version;
123static uint32_t num_architectural_pmu_gp_counters;
124static uint32_t num_architectural_pmu_fixed_counters;
125
126static int has_xsave;
127static int has_xsave2;
128static int has_xcrs;
129static int has_pit_state2;
130static int has_sregs2;
131static int has_exception_payload;
132
133static bool has_msr_mcg_ext_ctl;
134
135static struct kvm_cpuid2 *cpuid_cache;
136static struct kvm_cpuid2 *hv_cpuid_cache;
137static struct kvm_msr_list *kvm_feature_msrs;
138
139#define BUS_LOCK_SLICE_TIME 1000000000ULL
140static RateLimit bus_lock_ratelimit_ctrl;
141
142int kvm_has_pit_state2(void)
143{
144 return has_pit_state2;
145}
146
147bool kvm_has_smm(void)
148{
149 return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
150}
151
152bool kvm_has_adjust_clock_stable(void)
153{
154 int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
155
156 return (ret == KVM_CLOCK_TSC_STABLE);
157}
158
159bool kvm_has_adjust_clock(void)
160{
161 return kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK);
162}
163
164bool kvm_has_exception_payload(void)
165{
166 return has_exception_payload;
167}
168
169static bool kvm_x2apic_api_set_flags(uint64_t flags)
170{
171 KVMState *s = KVM_STATE(current_accel());
172
173 return !kvm_vm_enable_cap(s, KVM_CAP_X2APIC_API, 0, flags);
174}
175
176#define MEMORIZE(fn, _result) \
177 ({ \
178 static bool _memorized; \
179 \
180 if (_memorized) { \
181 return _result; \
182 } \
183 _memorized = true; \
184 _result = fn; \
185 })
186
187static bool has_x2apic_api;
188
189bool kvm_has_x2apic_api(void)
190{
191 return has_x2apic_api;
192}
193
194bool kvm_enable_x2apic(void)
195{
196 return MEMORIZE(
197 kvm_x2apic_api_set_flags(KVM_X2APIC_API_USE_32BIT_IDS |
198 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK),
199 has_x2apic_api);
200}
201
202bool kvm_hv_vpindex_settable(void)
203{
204 return hv_vpindex_settable;
205}
206
207static int kvm_get_tsc(CPUState *cs)
208{
209 X86CPU *cpu = X86_CPU(cs);
210 CPUX86State *env = &cpu->env;
211 struct {
212 struct kvm_msrs info;
213 struct kvm_msr_entry entries[1];
214 } msr_data = {};
215 int ret;
216
217 if (env->tsc_valid) {
218 return 0;
219 }
220
221 memset(&msr_data, 0, sizeof(msr_data));
222 msr_data.info.nmsrs = 1;
223 msr_data.entries[0].index = MSR_IA32_TSC;
224 env->tsc_valid = !runstate_is_running();
225
226 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
227 if (ret < 0) {
228 return ret;
229 }
230
231 assert(ret == 1);
232 env->tsc = msr_data.entries[0].data;
233 return 0;
234}
235
236static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg)
237{
238 kvm_get_tsc(cpu);
239}
240
241void kvm_synchronize_all_tsc(void)
242{
243 CPUState *cpu;
244
245 if (kvm_enabled()) {
246 CPU_FOREACH(cpu) {
247 run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
248 }
249 }
250}
251
252static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
253{
254 struct kvm_cpuid2 *cpuid;
255 int r, size;
256
257 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
258 cpuid = g_malloc0(size);
259 cpuid->nent = max;
260 r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
261 if (r == 0 && cpuid->nent >= max) {
262 r = -E2BIG;
263 }
264 if (r < 0) {
265 if (r == -E2BIG) {
266 g_free(cpuid);
267 return NULL;
268 } else {
269 fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
270 strerror(-r));
271 exit(1);
272 }
273 }
274 return cpuid;
275}
276
277
278
279
280static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s)
281{
282 struct kvm_cpuid2 *cpuid;
283 int max = 1;
284
285 if (cpuid_cache != NULL) {
286 return cpuid_cache;
287 }
288 while ((cpuid = try_get_cpuid(s, max)) == NULL) {
289 max *= 2;
290 }
291 cpuid_cache = cpuid;
292 return cpuid;
293}
294
295static bool host_tsx_broken(void)
296{
297 int family, model, stepping;\
298 char vendor[CPUID_VENDOR_SZ + 1];
299
300 host_cpu_vendor_fms(vendor, &family, &model, &stepping);
301
302
303 return !strcmp(vendor, CPUID_VENDOR_INTEL) &&
304 (family == 6) &&
305 ((model == 63 && stepping < 4) ||
306 model == 60 || model == 69 || model == 70);
307}
308
309
310
311static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
312{
313 uint32_t ret = 0;
314 switch (reg) {
315 case R_EAX:
316 ret = entry->eax;
317 break;
318 case R_EBX:
319 ret = entry->ebx;
320 break;
321 case R_ECX:
322 ret = entry->ecx;
323 break;
324 case R_EDX:
325 ret = entry->edx;
326 break;
327 }
328 return ret;
329}
330
331
332
333static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
334 uint32_t function,
335 uint32_t index)
336{
337 int i;
338 for (i = 0; i < cpuid->nent; ++i) {
339 if (cpuid->entries[i].function == function &&
340 cpuid->entries[i].index == index) {
341 return &cpuid->entries[i];
342 }
343 }
344
345 return NULL;
346}
347
348uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
349 uint32_t index, int reg)
350{
351 struct kvm_cpuid2 *cpuid;
352 uint32_t ret = 0;
353 uint32_t cpuid_1_edx;
354 uint64_t bitmask;
355
356 cpuid = get_supported_cpuid(s);
357
358 struct kvm_cpuid_entry2 *entry = cpuid_find_entry(cpuid, function, index);
359 if (entry) {
360 ret = cpuid_entry_get_reg(entry, reg);
361 }
362
363
364
365 if (function == 1 && reg == R_EDX) {
366
367 ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA;
368 } else if (function == 1 && reg == R_ECX) {
369
370
371
372 ret |= CPUID_EXT_HYPERVISOR;
373
374
375
376
377 if (kvm_irqchip_in_kernel() &&
378 kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
379 ret |= CPUID_EXT_TSC_DEADLINE_TIMER;
380 }
381
382
383
384
385 if (!kvm_irqchip_in_kernel()) {
386 ret &= ~CPUID_EXT_X2APIC;
387 }
388
389 if (enable_cpu_pm) {
390 int disable_exits = kvm_check_extension(s,
391 KVM_CAP_X86_DISABLE_EXITS);
392
393 if (disable_exits & KVM_X86_DISABLE_EXITS_MWAIT) {
394 ret |= CPUID_EXT_MONITOR;
395 }
396 }
397 } else if (function == 6 && reg == R_EAX) {
398 ret |= CPUID_6_EAX_ARAT;
399 } else if (function == 7 && index == 0 && reg == R_EBX) {
400 if (host_tsx_broken()) {
401 ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE);
402 }
403 } else if (function == 7 && index == 0 && reg == R_EDX) {
404
405
406
407
408
409 if (!has_msr_arch_capabs) {
410 ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
411 }
412 } else if (function == 0xd && index == 0 &&
413 (reg == R_EAX || reg == R_EDX)) {
414
415
416
417
418
419
420 struct kvm_device_attr attr = {
421 .group = 0,
422 .attr = KVM_X86_XCOMP_GUEST_SUPP,
423 .addr = (unsigned long) &bitmask
424 };
425
426 bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
427 if (!sys_attr) {
428 return ret;
429 }
430
431 int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
432 if (rc < 0) {
433 if (rc != -ENXIO) {
434 warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
435 "error: %d", rc);
436 }
437 return ret;
438 }
439 ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
440 } else if (function == 0x80000001 && reg == R_ECX) {
441
442
443
444
445
446 ret |= CPUID_EXT3_TOPOEXT;
447 } else if (function == 0x80000001 && reg == R_EDX) {
448
449
450
451 cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
452 ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
453 } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) {
454
455
456
457 if (!kvm_irqchip_in_kernel()) {
458 ret &= ~(1U << KVM_FEATURE_PV_UNHALT);
459 }
460 if (kvm_irqchip_is_split()) {
461 ret |= 1U << KVM_FEATURE_MSI_EXT_DEST_ID;
462 }
463 } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) {
464 ret |= 1U << KVM_HINTS_REALTIME;
465 }
466
467 return ret;
468}
469
470uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index)
471{
472 struct {
473 struct kvm_msrs info;
474 struct kvm_msr_entry entries[1];
475 } msr_data = {};
476 uint64_t value;
477 uint32_t ret, can_be_one, must_be_one;
478
479 if (kvm_feature_msrs == NULL) {
480 return 0;
481 }
482
483
484 int i;
485 for (i = 0; i < kvm_feature_msrs->nmsrs; i++)
486 if (kvm_feature_msrs->indices[i] == index) {
487 break;
488 }
489 if (i == kvm_feature_msrs->nmsrs) {
490 return 0;
491 }
492
493 msr_data.info.nmsrs = 1;
494 msr_data.entries[0].index = index;
495
496 ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data);
497 if (ret != 1) {
498 error_report("KVM get MSR (index=0x%x) feature failed, %s",
499 index, strerror(-ret));
500 exit(1);
501 }
502
503 value = msr_data.entries[0].data;
504 switch (index) {
505 case MSR_IA32_VMX_PROCBASED_CTLS2:
506 if (!has_msr_vmx_procbased_ctls2) {
507
508 if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) &
509 CPUID_XSAVE_XSAVES) {
510 value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32;
511 }
512 if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) &
513 CPUID_EXT_RDRAND) {
514 value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32;
515 }
516 if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
517 CPUID_7_0_EBX_INVPCID) {
518 value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32;
519 }
520 if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) &
521 CPUID_7_0_EBX_RDSEED) {
522 value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32;
523 }
524 if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) &
525 CPUID_EXT2_RDTSCP) {
526 value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32;
527 }
528 }
529
530 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
531 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
532 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
533 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
534
535
536
537
538
539 must_be_one = (uint32_t)value;
540 can_be_one = (uint32_t)(value >> 32);
541 return can_be_one & ~must_be_one;
542
543 default:
544 return value;
545 }
546}
547
548static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap,
549 int *max_banks)
550{
551 int r;
552
553 r = kvm_check_extension(s, KVM_CAP_MCE);
554 if (r > 0) {
555 *max_banks = r;
556 return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
557 }
558 return -ENOSYS;
559}
560
561static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
562{
563 CPUState *cs = CPU(cpu);
564 CPUX86State *env = &cpu->env;
565 uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
566 MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
567 uint64_t mcg_status = MCG_STATUS_MCIP;
568 int flags = 0;
569
570 if (code == BUS_MCEERR_AR) {
571 status |= MCI_STATUS_AR | 0x134;
572 mcg_status |= MCG_STATUS_RIPV | MCG_STATUS_EIPV;
573 } else {
574 status |= 0xc0;
575 mcg_status |= MCG_STATUS_RIPV;
576 }
577
578 flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;
579
580
581
582 cpu_synchronize_state(cs);
583 if (env->mcg_ext_ctl & MCG_EXT_CTL_LMCE_EN) {
584 mcg_status |= MCG_STATUS_LMCE;
585 flags = 0;
586 }
587
588 cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
589 (MCM_ADDR_PHYS << 6) | 0xc, flags);
590}
591
592static void emit_hypervisor_memory_failure(MemoryFailureAction action, bool ar)
593{
594 MemoryFailureFlags mff = {.action_required = ar, .recursive = false};
595
596 qapi_event_send_memory_failure(MEMORY_FAILURE_RECIPIENT_HYPERVISOR, action,
597 &mff);
598}
599
600static void hardware_memory_error(void *host_addr)
601{
602 emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_FATAL, true);
603 error_report("QEMU got Hardware memory error at addr %p", host_addr);
604 exit(1);
605}
606
607void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
608{
609 X86CPU *cpu = X86_CPU(c);
610 CPUX86State *env = &cpu->env;
611 ram_addr_t ram_addr;
612 hwaddr paddr;
613
614
615
616
617
618
619 assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
620
621 if ((env->mcg_cap & MCG_SER_P) && addr) {
622 ram_addr = qemu_ram_addr_from_host(addr);
623 if (ram_addr != RAM_ADDR_INVALID &&
624 kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
625 kvm_hwpoison_page_add(ram_addr);
626 kvm_mce_inject(cpu, paddr, code);
627
628
629
630
631
632
633 if (code == BUS_MCEERR_AR) {
634 error_report("Guest MCE Memory Error at QEMU addr %p and "
635 "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
636 addr, paddr, "BUS_MCEERR_AR");
637 } else {
638 warn_report("Guest MCE Memory Error at QEMU addr %p and "
639 "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
640 addr, paddr, "BUS_MCEERR_AO");
641 }
642
643 return;
644 }
645
646 if (code == BUS_MCEERR_AO) {
647 warn_report("Hardware memory error at addr %p of type %s "
648 "for memory used by QEMU itself instead of guest system!",
649 addr, "BUS_MCEERR_AO");
650 }
651 }
652
653 if (code == BUS_MCEERR_AR) {
654 hardware_memory_error(addr);
655 }
656
657
658 emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_IGNORE, false);
659}
660
661static void kvm_reset_exception(CPUX86State *env)
662{
663 env->exception_nr = -1;
664 env->exception_pending = 0;
665 env->exception_injected = 0;
666 env->exception_has_payload = false;
667 env->exception_payload = 0;
668}
669
670static void kvm_queue_exception(CPUX86State *env,
671 int32_t exception_nr,
672 uint8_t exception_has_payload,
673 uint64_t exception_payload)
674{
675 assert(env->exception_nr == -1);
676 assert(!env->exception_pending);
677 assert(!env->exception_injected);
678 assert(!env->exception_has_payload);
679
680 env->exception_nr = exception_nr;
681
682 if (has_exception_payload) {
683 env->exception_pending = 1;
684
685 env->exception_has_payload = exception_has_payload;
686 env->exception_payload = exception_payload;
687 } else {
688 env->exception_injected = 1;
689
690 if (exception_nr == EXCP01_DB) {
691 assert(exception_has_payload);
692 env->dr[6] = exception_payload;
693 } else if (exception_nr == EXCP0E_PAGE) {
694 assert(exception_has_payload);
695 env->cr[2] = exception_payload;
696 } else {
697 assert(!exception_has_payload);
698 }
699 }
700}
701
702static int kvm_inject_mce_oldstyle(X86CPU *cpu)
703{
704 CPUX86State *env = &cpu->env;
705
706 if (!kvm_has_vcpu_events() && env->exception_nr == EXCP12_MCHK) {
707 unsigned int bank, bank_num = env->mcg_cap & 0xff;
708 struct kvm_x86_mce mce;
709
710 kvm_reset_exception(env);
711
712
713
714
715
716 for (bank = 0; bank < bank_num; bank++) {
717 if (env->mce_banks[bank * 4 + 1] & MCI_STATUS_VAL) {
718 break;
719 }
720 }
721 assert(bank < bank_num);
722
723 mce.bank = bank;
724 mce.status = env->mce_banks[bank * 4 + 1];
725 mce.mcg_status = env->mcg_status;
726 mce.addr = env->mce_banks[bank * 4 + 2];
727 mce.misc = env->mce_banks[bank * 4 + 3];
728
729 return kvm_vcpu_ioctl(CPU(cpu), KVM_X86_SET_MCE, &mce);
730 }
731 return 0;
732}
733
734static void cpu_update_state(void *opaque, bool running, RunState state)
735{
736 CPUX86State *env = opaque;
737
738 if (running) {
739 env->tsc_valid = false;
740 }
741}
742
743unsigned long kvm_arch_vcpu_id(CPUState *cs)
744{
745 X86CPU *cpu = X86_CPU(cs);
746 return cpu->apic_id;
747}
748
749#ifndef KVM_CPUID_SIGNATURE_NEXT
750#define KVM_CPUID_SIGNATURE_NEXT 0x40000100
751#endif
752
753static bool hyperv_enabled(X86CPU *cpu)
754{
755 return kvm_check_extension(kvm_state, KVM_CAP_HYPERV) > 0 &&
756 ((cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_NOTIFY) ||
757 cpu->hyperv_features || cpu->hyperv_passthrough);
758}
759
760
761
762
763
764static inline bool freq_within_bounds(int freq, int target_freq)
765{
766 int max_freq = freq + (freq * 250 / 1000000);
767 int min_freq = freq - (freq * 250 / 1000000);
768
769 if (target_freq >= min_freq && target_freq <= max_freq) {
770 return true;
771 }
772
773 return false;
774}
775
776static int kvm_arch_set_tsc_khz(CPUState *cs)
777{
778 X86CPU *cpu = X86_CPU(cs);
779 CPUX86State *env = &cpu->env;
780 int r, cur_freq;
781 bool set_ioctl = false;
782
783 if (!env->tsc_khz) {
784 return 0;
785 }
786
787 cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
788 kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : -ENOTSUP;
789
790
791
792
793 if (kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL)) {
794 set_ioctl = true;
795 }
796
797
798
799
800
801 if (cur_freq != -ENOTSUP && freq_within_bounds(cur_freq, env->tsc_khz)) {
802 set_ioctl = true;
803 }
804
805 r = set_ioctl ?
806 kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz) :
807 -ENOTSUP;
808
809 if (r < 0) {
810
811
812
813 cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
814 kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
815 -ENOTSUP;
816 if (cur_freq <= 0 || cur_freq != env->tsc_khz) {
817 warn_report("TSC frequency mismatch between "
818 "VM (%" PRId64 " kHz) and host (%d kHz), "
819 "and TSC scaling unavailable",
820 env->tsc_khz, cur_freq);
821 return r;
822 }
823 }
824
825 return 0;
826}
827
828static bool tsc_is_stable_and_known(CPUX86State *env)
829{
830 if (!env->tsc_khz) {
831 return false;
832 }
833 return (env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC)
834 || env->user_tsc_khz;
835}
836
837static struct {
838 const char *desc;
839 struct {
840 uint32_t func;
841 int reg;
842 uint32_t bits;
843 } flags[2];
844 uint64_t dependencies;
845} kvm_hyperv_properties[] = {
846 [HYPERV_FEAT_RELAXED] = {
847 .desc = "relaxed timing (hv-relaxed)",
848 .flags = {
849 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
850 .bits = HV_RELAXED_TIMING_RECOMMENDED}
851 }
852 },
853 [HYPERV_FEAT_VAPIC] = {
854 .desc = "virtual APIC (hv-vapic)",
855 .flags = {
856 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
857 .bits = HV_APIC_ACCESS_AVAILABLE}
858 }
859 },
860 [HYPERV_FEAT_TIME] = {
861 .desc = "clocksources (hv-time)",
862 .flags = {
863 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
864 .bits = HV_TIME_REF_COUNT_AVAILABLE | HV_REFERENCE_TSC_AVAILABLE}
865 }
866 },
867 [HYPERV_FEAT_CRASH] = {
868 .desc = "crash MSRs (hv-crash)",
869 .flags = {
870 {.func = HV_CPUID_FEATURES, .reg = R_EDX,
871 .bits = HV_GUEST_CRASH_MSR_AVAILABLE}
872 }
873 },
874 [HYPERV_FEAT_RESET] = {
875 .desc = "reset MSR (hv-reset)",
876 .flags = {
877 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
878 .bits = HV_RESET_AVAILABLE}
879 }
880 },
881 [HYPERV_FEAT_VPINDEX] = {
882 .desc = "VP_INDEX MSR (hv-vpindex)",
883 .flags = {
884 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
885 .bits = HV_VP_INDEX_AVAILABLE}
886 }
887 },
888 [HYPERV_FEAT_RUNTIME] = {
889 .desc = "VP_RUNTIME MSR (hv-runtime)",
890 .flags = {
891 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
892 .bits = HV_VP_RUNTIME_AVAILABLE}
893 }
894 },
895 [HYPERV_FEAT_SYNIC] = {
896 .desc = "synthetic interrupt controller (hv-synic)",
897 .flags = {
898 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
899 .bits = HV_SYNIC_AVAILABLE}
900 }
901 },
902 [HYPERV_FEAT_STIMER] = {
903 .desc = "synthetic timers (hv-stimer)",
904 .flags = {
905 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
906 .bits = HV_SYNTIMERS_AVAILABLE}
907 },
908 .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_TIME)
909 },
910 [HYPERV_FEAT_FREQUENCIES] = {
911 .desc = "frequency MSRs (hv-frequencies)",
912 .flags = {
913 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
914 .bits = HV_ACCESS_FREQUENCY_MSRS},
915 {.func = HV_CPUID_FEATURES, .reg = R_EDX,
916 .bits = HV_FREQUENCY_MSRS_AVAILABLE}
917 }
918 },
919 [HYPERV_FEAT_REENLIGHTENMENT] = {
920 .desc = "reenlightenment MSRs (hv-reenlightenment)",
921 .flags = {
922 {.func = HV_CPUID_FEATURES, .reg = R_EAX,
923 .bits = HV_ACCESS_REENLIGHTENMENTS_CONTROL}
924 }
925 },
926 [HYPERV_FEAT_TLBFLUSH] = {
927 .desc = "paravirtualized TLB flush (hv-tlbflush)",
928 .flags = {
929 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
930 .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED |
931 HV_EX_PROCESSOR_MASKS_RECOMMENDED}
932 },
933 .dependencies = BIT(HYPERV_FEAT_VPINDEX)
934 },
935 [HYPERV_FEAT_EVMCS] = {
936 .desc = "enlightened VMCS (hv-evmcs)",
937 .flags = {
938 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
939 .bits = HV_ENLIGHTENED_VMCS_RECOMMENDED}
940 },
941 .dependencies = BIT(HYPERV_FEAT_VAPIC)
942 },
943 [HYPERV_FEAT_IPI] = {
944 .desc = "paravirtualized IPI (hv-ipi)",
945 .flags = {
946 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
947 .bits = HV_CLUSTER_IPI_RECOMMENDED |
948 HV_EX_PROCESSOR_MASKS_RECOMMENDED}
949 },
950 .dependencies = BIT(HYPERV_FEAT_VPINDEX)
951 },
952 [HYPERV_FEAT_STIMER_DIRECT] = {
953 .desc = "direct mode synthetic timers (hv-stimer-direct)",
954 .flags = {
955 {.func = HV_CPUID_FEATURES, .reg = R_EDX,
956 .bits = HV_STIMER_DIRECT_MODE_AVAILABLE}
957 },
958 .dependencies = BIT(HYPERV_FEAT_STIMER)
959 },
960 [HYPERV_FEAT_AVIC] = {
961 .desc = "AVIC/APICv support (hv-avic/hv-apicv)",
962 .flags = {
963 {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX,
964 .bits = HV_DEPRECATING_AEOI_RECOMMENDED}
965 }
966 },
967};
968
969static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max,
970 bool do_sys_ioctl)
971{
972 struct kvm_cpuid2 *cpuid;
973 int r, size;
974
975 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
976 cpuid = g_malloc0(size);
977 cpuid->nent = max;
978
979 if (do_sys_ioctl) {
980 r = kvm_ioctl(kvm_state, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
981 } else {
982 r = kvm_vcpu_ioctl(cs, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
983 }
984 if (r == 0 && cpuid->nent >= max) {
985 r = -E2BIG;
986 }
987 if (r < 0) {
988 if (r == -E2BIG) {
989 g_free(cpuid);
990 return NULL;
991 } else {
992 fprintf(stderr, "KVM_GET_SUPPORTED_HV_CPUID failed: %s\n",
993 strerror(-r));
994 exit(1);
995 }
996 }
997 return cpuid;
998}
999
1000
1001
1002
1003
1004static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs)
1005{
1006 struct kvm_cpuid2 *cpuid;
1007
1008 int max = 10;
1009 int i;
1010 bool do_sys_ioctl;
1011
1012 do_sys_ioctl =
1013 kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID) > 0;
1014
1015
1016
1017
1018
1019 assert(do_sys_ioctl || cs->kvm_state);
1020
1021
1022
1023
1024
1025
1026 while ((cpuid = try_get_hv_cpuid(cs, max, do_sys_ioctl)) == NULL) {
1027 max++;
1028 }
1029
1030
1031
1032
1033
1034
1035
1036 if (!do_sys_ioctl && kvm_check_extension(cs->kvm_state,
1037 KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) {
1038 for (i = 0; i < cpuid->nent; i++) {
1039 if (cpuid->entries[i].function == HV_CPUID_ENLIGHTMENT_INFO) {
1040 cpuid->entries[i].eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
1041 }
1042 }
1043 }
1044
1045 return cpuid;
1046}
1047
1048
1049
1050
1051
1052static struct kvm_cpuid2 *get_supported_hv_cpuid_legacy(CPUState *cs)
1053{
1054 X86CPU *cpu = X86_CPU(cs);
1055 struct kvm_cpuid2 *cpuid;
1056 struct kvm_cpuid_entry2 *entry_feat, *entry_recomm;
1057
1058
1059 cpuid = g_malloc0(sizeof(*cpuid) + 2 * sizeof(*cpuid->entries));
1060 cpuid->nent = 2;
1061
1062
1063 entry_feat = &cpuid->entries[0];
1064 entry_feat->function = HV_CPUID_FEATURES;
1065
1066 entry_recomm = &cpuid->entries[1];
1067 entry_recomm->function = HV_CPUID_ENLIGHTMENT_INFO;
1068 entry_recomm->ebx = cpu->hyperv_spinlock_attempts;
1069
1070 if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0) {
1071 entry_feat->eax |= HV_HYPERCALL_AVAILABLE;
1072 entry_feat->eax |= HV_APIC_ACCESS_AVAILABLE;
1073 entry_feat->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
1074 entry_recomm->eax |= HV_RELAXED_TIMING_RECOMMENDED;
1075 entry_recomm->eax |= HV_APIC_ACCESS_RECOMMENDED;
1076 }
1077
1078 if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) {
1079 entry_feat->eax |= HV_TIME_REF_COUNT_AVAILABLE;
1080 entry_feat->eax |= HV_REFERENCE_TSC_AVAILABLE;
1081 }
1082
1083 if (has_msr_hv_frequencies) {
1084 entry_feat->eax |= HV_ACCESS_FREQUENCY_MSRS;
1085 entry_feat->edx |= HV_FREQUENCY_MSRS_AVAILABLE;
1086 }
1087
1088 if (has_msr_hv_crash) {
1089 entry_feat->edx |= HV_GUEST_CRASH_MSR_AVAILABLE;
1090 }
1091
1092 if (has_msr_hv_reenlightenment) {
1093 entry_feat->eax |= HV_ACCESS_REENLIGHTENMENTS_CONTROL;
1094 }
1095
1096 if (has_msr_hv_reset) {
1097 entry_feat->eax |= HV_RESET_AVAILABLE;
1098 }
1099
1100 if (has_msr_hv_vpindex) {
1101 entry_feat->eax |= HV_VP_INDEX_AVAILABLE;
1102 }
1103
1104 if (has_msr_hv_runtime) {
1105 entry_feat->eax |= HV_VP_RUNTIME_AVAILABLE;
1106 }
1107
1108 if (has_msr_hv_synic) {
1109 unsigned int cap = cpu->hyperv_synic_kvm_only ?
1110 KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2;
1111
1112 if (kvm_check_extension(cs->kvm_state, cap) > 0) {
1113 entry_feat->eax |= HV_SYNIC_AVAILABLE;
1114 }
1115 }
1116
1117 if (has_msr_hv_stimer) {
1118 entry_feat->eax |= HV_SYNTIMERS_AVAILABLE;
1119 }
1120
1121 if (kvm_check_extension(cs->kvm_state,
1122 KVM_CAP_HYPERV_TLBFLUSH) > 0) {
1123 entry_recomm->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED;
1124 entry_recomm->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
1125 }
1126
1127 if (kvm_check_extension(cs->kvm_state,
1128 KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) {
1129 entry_recomm->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
1130 }
1131
1132 if (kvm_check_extension(cs->kvm_state,
1133 KVM_CAP_HYPERV_SEND_IPI) > 0) {
1134 entry_recomm->eax |= HV_CLUSTER_IPI_RECOMMENDED;
1135 entry_recomm->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
1136 }
1137
1138 return cpuid;
1139}
1140
1141static uint32_t hv_cpuid_get_host(CPUState *cs, uint32_t func, int reg)
1142{
1143 struct kvm_cpuid_entry2 *entry;
1144 struct kvm_cpuid2 *cpuid;
1145
1146 if (hv_cpuid_cache) {
1147 cpuid = hv_cpuid_cache;
1148 } else {
1149 if (kvm_check_extension(kvm_state, KVM_CAP_HYPERV_CPUID) > 0) {
1150 cpuid = get_supported_hv_cpuid(cs);
1151 } else {
1152
1153
1154
1155
1156
1157
1158 assert(cs->kvm_state);
1159
1160 cpuid = get_supported_hv_cpuid_legacy(cs);
1161 }
1162 hv_cpuid_cache = cpuid;
1163 }
1164
1165 if (!cpuid) {
1166 return 0;
1167 }
1168
1169 entry = cpuid_find_entry(cpuid, func, 0);
1170 if (!entry) {
1171 return 0;
1172 }
1173
1174 return cpuid_entry_get_reg(entry, reg);
1175}
1176
1177static bool hyperv_feature_supported(CPUState *cs, int feature)
1178{
1179 uint32_t func, bits;
1180 int i, reg;
1181
1182 for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties[feature].flags); i++) {
1183
1184 func = kvm_hyperv_properties[feature].flags[i].func;
1185 reg = kvm_hyperv_properties[feature].flags[i].reg;
1186 bits = kvm_hyperv_properties[feature].flags[i].bits;
1187
1188 if (!func) {
1189 continue;
1190 }
1191
1192 if ((hv_cpuid_get_host(cs, func, reg) & bits) != bits) {
1193 return false;
1194 }
1195 }
1196
1197 return true;
1198}
1199
1200
1201static bool hv_feature_check_deps(X86CPU *cpu, int feature, Error **errp)
1202{
1203 uint64_t deps;
1204 int dep_feat;
1205
1206 deps = kvm_hyperv_properties[feature].dependencies;
1207 while (deps) {
1208 dep_feat = ctz64(deps);
1209 if (!(hyperv_feat_enabled(cpu, dep_feat))) {
1210 error_setg(errp, "Hyper-V %s requires Hyper-V %s",
1211 kvm_hyperv_properties[feature].desc,
1212 kvm_hyperv_properties[dep_feat].desc);
1213 return false;
1214 }
1215 deps &= ~(1ull << dep_feat);
1216 }
1217
1218 return true;
1219}
1220
1221static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg)
1222{
1223 X86CPU *cpu = X86_CPU(cs);
1224 uint32_t r = 0;
1225 int i, j;
1226
1227 for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties); i++) {
1228 if (!hyperv_feat_enabled(cpu, i)) {
1229 continue;
1230 }
1231
1232 for (j = 0; j < ARRAY_SIZE(kvm_hyperv_properties[i].flags); j++) {
1233 if (kvm_hyperv_properties[i].flags[j].func != func) {
1234 continue;
1235 }
1236 if (kvm_hyperv_properties[i].flags[j].reg != reg) {
1237 continue;
1238 }
1239
1240 r |= kvm_hyperv_properties[i].flags[j].bits;
1241 }
1242 }
1243
1244 return r;
1245}
1246
1247
1248
1249
1250
1251
1252
1253
1254bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp)
1255{
1256 CPUState *cs = CPU(cpu);
1257 Error *local_err = NULL;
1258 int feat;
1259
1260 if (!hyperv_enabled(cpu))
1261 return true;
1262
1263
1264
1265
1266
1267
1268 if (!cs->kvm_state &&
1269 !kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID))
1270 return true;
1271
1272 if (cpu->hyperv_passthrough) {
1273 cpu->hyperv_vendor_id[0] =
1274 hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EBX);
1275 cpu->hyperv_vendor_id[1] =
1276 hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_ECX);
1277 cpu->hyperv_vendor_id[2] =
1278 hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EDX);
1279 cpu->hyperv_vendor = g_realloc(cpu->hyperv_vendor,
1280 sizeof(cpu->hyperv_vendor_id) + 1);
1281 memcpy(cpu->hyperv_vendor, cpu->hyperv_vendor_id,
1282 sizeof(cpu->hyperv_vendor_id));
1283 cpu->hyperv_vendor[sizeof(cpu->hyperv_vendor_id)] = 0;
1284
1285 cpu->hyperv_interface_id[0] =
1286 hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EAX);
1287 cpu->hyperv_interface_id[1] =
1288 hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EBX);
1289 cpu->hyperv_interface_id[2] =
1290 hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_ECX);
1291 cpu->hyperv_interface_id[3] =
1292 hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EDX);
1293
1294 cpu->hyperv_ver_id_build =
1295 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EAX);
1296 cpu->hyperv_ver_id_major =
1297 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) >> 16;
1298 cpu->hyperv_ver_id_minor =
1299 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) & 0xffff;
1300 cpu->hyperv_ver_id_sp =
1301 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_ECX);
1302 cpu->hyperv_ver_id_sb =
1303 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) >> 24;
1304 cpu->hyperv_ver_id_sn =
1305 hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) & 0xffffff;
1306
1307 cpu->hv_max_vps = hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS,
1308 R_EAX);
1309 cpu->hyperv_limits[0] =
1310 hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EBX);
1311 cpu->hyperv_limits[1] =
1312 hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_ECX);
1313 cpu->hyperv_limits[2] =
1314 hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EDX);
1315
1316 cpu->hyperv_spinlock_attempts =
1317 hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EBX);
1318
1319
1320
1321
1322
1323 for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) {
1324 if (hyperv_feature_supported(cs, feat)) {
1325 cpu->hyperv_features |= BIT(feat);
1326 }
1327 }
1328 } else {
1329
1330 for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) {
1331
1332 if (!hyperv_feat_enabled(cpu, feat)) {
1333 continue;
1334 }
1335
1336
1337 if (!hyperv_feature_supported(cs, feat)) {
1338 error_setg(errp, "Hyper-V %s is not supported by kernel",
1339 kvm_hyperv_properties[feat].desc);
1340 return false;
1341 }
1342
1343
1344 if (!hv_feature_check_deps(cpu, feat, &local_err)) {
1345 error_propagate(errp, local_err);
1346 return false;
1347 }
1348 }
1349 }
1350
1351
1352 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) &&
1353 !cpu->hyperv_synic_kvm_only &&
1354 !hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX)) {
1355 error_setg(errp, "Hyper-V %s requires Hyper-V %s",
1356 kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc,
1357 kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc);
1358 return false;
1359 }
1360
1361 return true;
1362}
1363
1364
1365
1366
1367static int hyperv_fill_cpuids(CPUState *cs,
1368 struct kvm_cpuid_entry2 *cpuid_ent)
1369{
1370 X86CPU *cpu = X86_CPU(cs);
1371 struct kvm_cpuid_entry2 *c;
1372 uint32_t cpuid_i = 0;
1373
1374 c = &cpuid_ent[cpuid_i++];
1375 c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
1376 c->eax = hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ?
1377 HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS;
1378 c->ebx = cpu->hyperv_vendor_id[0];
1379 c->ecx = cpu->hyperv_vendor_id[1];
1380 c->edx = cpu->hyperv_vendor_id[2];
1381
1382 c = &cpuid_ent[cpuid_i++];
1383 c->function = HV_CPUID_INTERFACE;
1384 c->eax = cpu->hyperv_interface_id[0];
1385 c->ebx = cpu->hyperv_interface_id[1];
1386 c->ecx = cpu->hyperv_interface_id[2];
1387 c->edx = cpu->hyperv_interface_id[3];
1388
1389 c = &cpuid_ent[cpuid_i++];
1390 c->function = HV_CPUID_VERSION;
1391 c->eax = cpu->hyperv_ver_id_build;
1392 c->ebx = (uint32_t)cpu->hyperv_ver_id_major << 16 |
1393 cpu->hyperv_ver_id_minor;
1394 c->ecx = cpu->hyperv_ver_id_sp;
1395 c->edx = (uint32_t)cpu->hyperv_ver_id_sb << 24 |
1396 (cpu->hyperv_ver_id_sn & 0xffffff);
1397
1398 c = &cpuid_ent[cpuid_i++];
1399 c->function = HV_CPUID_FEATURES;
1400 c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EAX);
1401 c->ebx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EBX);
1402 c->edx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EDX);
1403
1404
1405 c->eax |= HV_HYPERCALL_AVAILABLE;
1406
1407
1408 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) &&
1409 !cpu->hyperv_synic_kvm_only) {
1410 c->ebx |= HV_POST_MESSAGES | HV_SIGNAL_EVENTS;
1411 }
1412
1413
1414
1415 c->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE;
1416
1417 c = &cpuid_ent[cpuid_i++];
1418 c->function = HV_CPUID_ENLIGHTMENT_INFO;
1419 c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX);
1420 c->ebx = cpu->hyperv_spinlock_attempts;
1421
1422 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC) &&
1423 !hyperv_feat_enabled(cpu, HYPERV_FEAT_AVIC)) {
1424 c->eax |= HV_APIC_ACCESS_RECOMMENDED;
1425 }
1426
1427 if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_ON) {
1428 c->eax |= HV_NO_NONARCH_CORESHARING;
1429 } else if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO) {
1430 c->eax |= hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX) &
1431 HV_NO_NONARCH_CORESHARING;
1432 }
1433
1434 c = &cpuid_ent[cpuid_i++];
1435 c->function = HV_CPUID_IMPLEMENT_LIMITS;
1436 c->eax = cpu->hv_max_vps;
1437 c->ebx = cpu->hyperv_limits[0];
1438 c->ecx = cpu->hyperv_limits[1];
1439 c->edx = cpu->hyperv_limits[2];
1440
1441 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
1442 uint32_t function;
1443
1444
1445 for (function = HV_CPUID_IMPLEMENT_LIMITS + 1;
1446 function < HV_CPUID_NESTED_FEATURES; function++) {
1447 c = &cpuid_ent[cpuid_i++];
1448 c->function = function;
1449 }
1450
1451 c = &cpuid_ent[cpuid_i++];
1452 c->function = HV_CPUID_NESTED_FEATURES;
1453 c->eax = cpu->hyperv_nested[0];
1454 }
1455
1456 return cpuid_i;
1457}
1458
1459static Error *hv_passthrough_mig_blocker;
1460static Error *hv_no_nonarch_cs_mig_blocker;
1461
1462
1463static bool evmcs_version_supported(uint16_t evmcs_version,
1464 uint16_t supported_evmcs_version)
1465{
1466 uint8_t min_version = evmcs_version & 0xff;
1467 uint8_t max_version = evmcs_version >> 8;
1468 uint8_t min_supported_version = supported_evmcs_version & 0xff;
1469 uint8_t max_supported_version = supported_evmcs_version >> 8;
1470
1471 return (min_version >= min_supported_version) &&
1472 (max_version <= max_supported_version);
1473}
1474
1475#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1)
1476
1477static int hyperv_init_vcpu(X86CPU *cpu)
1478{
1479 CPUState *cs = CPU(cpu);
1480 Error *local_err = NULL;
1481 int ret;
1482
1483 if (cpu->hyperv_passthrough && hv_passthrough_mig_blocker == NULL) {
1484 error_setg(&hv_passthrough_mig_blocker,
1485 "'hv-passthrough' CPU flag prevents migration, use explicit"
1486 " set of hv-* flags instead");
1487 ret = migrate_add_blocker(hv_passthrough_mig_blocker, &local_err);
1488 if (ret < 0) {
1489 error_report_err(local_err);
1490 return ret;
1491 }
1492 }
1493
1494 if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO &&
1495 hv_no_nonarch_cs_mig_blocker == NULL) {
1496 error_setg(&hv_no_nonarch_cs_mig_blocker,
1497 "'hv-no-nonarch-coresharing=auto' CPU flag prevents migration"
1498 " use explicit 'hv-no-nonarch-coresharing=on' instead (but"
1499 " make sure SMT is disabled and/or that vCPUs are properly"
1500 " pinned)");
1501 ret = migrate_add_blocker(hv_no_nonarch_cs_mig_blocker, &local_err);
1502 if (ret < 0) {
1503 error_report_err(local_err);
1504 return ret;
1505 }
1506 }
1507
1508 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && !hv_vpindex_settable) {
1509
1510
1511
1512
1513 struct {
1514 struct kvm_msrs info;
1515 struct kvm_msr_entry entries[1];
1516 } msr_data = {
1517 .info.nmsrs = 1,
1518 .entries[0].index = HV_X64_MSR_VP_INDEX,
1519 };
1520
1521 ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data);
1522 if (ret < 0) {
1523 return ret;
1524 }
1525 assert(ret == 1);
1526
1527 if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) {
1528 error_report("kernel's vp_index != QEMU's vp_index");
1529 return -ENXIO;
1530 }
1531 }
1532
1533 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
1534 uint32_t synic_cap = cpu->hyperv_synic_kvm_only ?
1535 KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2;
1536 ret = kvm_vcpu_enable_cap(cs, synic_cap, 0);
1537 if (ret < 0) {
1538 error_report("failed to turn on HyperV SynIC in KVM: %s",
1539 strerror(-ret));
1540 return ret;
1541 }
1542
1543 if (!cpu->hyperv_synic_kvm_only) {
1544 ret = hyperv_x86_synic_add(cpu);
1545 if (ret < 0) {
1546 error_report("failed to create HyperV SynIC: %s",
1547 strerror(-ret));
1548 return ret;
1549 }
1550 }
1551 }
1552
1553 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) {
1554 uint16_t evmcs_version = DEFAULT_EVMCS_VERSION;
1555 uint16_t supported_evmcs_version;
1556
1557 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
1558 (uintptr_t)&supported_evmcs_version);
1559
1560
1561
1562
1563
1564
1565
1566
1567 if (ret < 0) {
1568 error_report("Hyper-V %s is not supported by kernel",
1569 kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc);
1570 return ret;
1571 }
1572
1573 if (!evmcs_version_supported(evmcs_version, supported_evmcs_version)) {
1574 error_report("eVMCS version range [%d..%d] is not supported by "
1575 "kernel (supported: [%d..%d])", evmcs_version & 0xff,
1576 evmcs_version >> 8, supported_evmcs_version & 0xff,
1577 supported_evmcs_version >> 8);
1578 return -ENOTSUP;
1579 }
1580
1581 cpu->hyperv_nested[0] = evmcs_version;
1582 }
1583
1584 if (cpu->hyperv_enforce_cpuid) {
1585 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENFORCE_CPUID, 0, 1);
1586 if (ret < 0) {
1587 error_report("failed to enable KVM_CAP_HYPERV_ENFORCE_CPUID: %s",
1588 strerror(-ret));
1589 return ret;
1590 }
1591 }
1592
1593 return 0;
1594}
1595
1596static Error *invtsc_mig_blocker;
1597
1598#define KVM_MAX_CPUID_ENTRIES 100
1599
1600static void kvm_init_xsave(CPUX86State *env)
1601{
1602 if (has_xsave2) {
1603 env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
1604 } else if (has_xsave) {
1605 env->xsave_buf_len = sizeof(struct kvm_xsave);
1606 } else {
1607 return;
1608 }
1609
1610 env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
1611 memset(env->xsave_buf, 0, env->xsave_buf_len);
1612
1613
1614
1615
1616 assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
1617 env->xsave_buf_len);
1618}
1619
1620int kvm_arch_init_vcpu(CPUState *cs)
1621{
1622 struct {
1623 struct kvm_cpuid2 cpuid;
1624 struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
1625 } cpuid_data;
1626
1627
1628
1629
1630 QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
1631 sizeof(struct kvm_cpuid2) +
1632 sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
1633
1634 X86CPU *cpu = X86_CPU(cs);
1635 CPUX86State *env = &cpu->env;
1636 uint32_t limit, i, j, cpuid_i;
1637 uint32_t unused;
1638 struct kvm_cpuid_entry2 *c;
1639 uint32_t signature[3];
1640 int kvm_base = KVM_CPUID_SIGNATURE;
1641 int max_nested_state_len;
1642 int r;
1643 Error *local_err = NULL;
1644
1645 memset(&cpuid_data, 0, sizeof(cpuid_data));
1646
1647 cpuid_i = 0;
1648
1649 has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
1650
1651 r = kvm_arch_set_tsc_khz(cs);
1652 if (r < 0) {
1653 return r;
1654 }
1655
1656
1657
1658
1659
1660
1661 if (!env->tsc_khz) {
1662 r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
1663 kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
1664 -ENOTSUP;
1665 if (r > 0) {
1666 env->tsc_khz = r;
1667 }
1668 }
1669
1670 env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680 if (!kvm_hyperv_expand_features(cpu, &local_err)) {
1681 error_report_err(local_err);
1682 return -ENOSYS;
1683 }
1684
1685 if (hyperv_enabled(cpu)) {
1686 r = hyperv_init_vcpu(cpu);
1687 if (r) {
1688 return r;
1689 }
1690
1691 cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
1692 kvm_base = KVM_CPUID_SIGNATURE_NEXT;
1693 has_msr_hv_hypercall = true;
1694 }
1695
1696 if (cpu->expose_kvm) {
1697 memcpy(signature, "KVMKVMKVM\0\0\0", 12);
1698 c = &cpuid_data.entries[cpuid_i++];
1699 c->function = KVM_CPUID_SIGNATURE | kvm_base;
1700 c->eax = KVM_CPUID_FEATURES | kvm_base;
1701 c->ebx = signature[0];
1702 c->ecx = signature[1];
1703 c->edx = signature[2];
1704
1705 c = &cpuid_data.entries[cpuid_i++];
1706 c->function = KVM_CPUID_FEATURES | kvm_base;
1707 c->eax = env->features[FEAT_KVM];
1708 c->edx = env->features[FEAT_KVM_HINTS];
1709 }
1710
1711 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
1712
1713 if (cpu->kvm_pv_enforce_cpuid) {
1714 r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
1715 if (r < 0) {
1716 fprintf(stderr,
1717 "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
1718 strerror(-r));
1719 abort();
1720 }
1721 }
1722
1723 for (i = 0; i <= limit; i++) {
1724 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1725 fprintf(stderr, "unsupported level value: 0x%x\n", limit);
1726 abort();
1727 }
1728 c = &cpuid_data.entries[cpuid_i++];
1729
1730 switch (i) {
1731 case 2: {
1732
1733 int times;
1734
1735 c->function = i;
1736 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
1737 KVM_CPUID_FLAG_STATE_READ_NEXT;
1738 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1739 times = c->eax & 0xff;
1740
1741 for (j = 1; j < times; ++j) {
1742 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1743 fprintf(stderr, "cpuid_data is full, no space for "
1744 "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
1745 abort();
1746 }
1747 c = &cpuid_data.entries[cpuid_i++];
1748 c->function = i;
1749 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
1750 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1751 }
1752 break;
1753 }
1754 case 0x1f:
1755 if (env->nr_dies < 2) {
1756 break;
1757 }
1758
1759 case 4:
1760 case 0xb:
1761 case 0xd:
1762 for (j = 0; ; j++) {
1763 if (i == 0xd && j == 64) {
1764 break;
1765 }
1766
1767 if (i == 0x1f && j == 64) {
1768 break;
1769 }
1770
1771 c->function = i;
1772 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1773 c->index = j;
1774 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
1775
1776 if (i == 4 && c->eax == 0) {
1777 break;
1778 }
1779 if (i == 0xb && !(c->ecx & 0xff00)) {
1780 break;
1781 }
1782 if (i == 0x1f && !(c->ecx & 0xff00)) {
1783 break;
1784 }
1785 if (i == 0xd && c->eax == 0) {
1786 continue;
1787 }
1788 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1789 fprintf(stderr, "cpuid_data is full, no space for "
1790 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
1791 abort();
1792 }
1793 c = &cpuid_data.entries[cpuid_i++];
1794 }
1795 break;
1796 case 0x7:
1797 case 0x12:
1798 for (j = 0; ; j++) {
1799 c->function = i;
1800 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1801 c->index = j;
1802 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
1803
1804 if (j > 1 && (c->eax & 0xf) != 1) {
1805 break;
1806 }
1807
1808 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1809 fprintf(stderr, "cpuid_data is full, no space for "
1810 "cpuid(eax:0x12,ecx:0x%x)\n", j);
1811 abort();
1812 }
1813 c = &cpuid_data.entries[cpuid_i++];
1814 }
1815 break;
1816 case 0x14:
1817 case 0x1d:
1818 case 0x1e: {
1819 uint32_t times;
1820
1821 c->function = i;
1822 c->index = 0;
1823 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1824 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1825 times = c->eax;
1826
1827 for (j = 1; j <= times; ++j) {
1828 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1829 fprintf(stderr, "cpuid_data is full, no space for "
1830 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
1831 abort();
1832 }
1833 c = &cpuid_data.entries[cpuid_i++];
1834 c->function = i;
1835 c->index = j;
1836 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1837 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
1838 }
1839 break;
1840 }
1841 default:
1842 c->function = i;
1843 c->flags = 0;
1844 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1845 if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
1846
1847
1848
1849
1850 cpuid_i--;
1851 }
1852 break;
1853 }
1854 }
1855
1856 if (limit >= 0x0a) {
1857 uint32_t eax, edx;
1858
1859 cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx);
1860
1861 has_architectural_pmu_version = eax & 0xff;
1862 if (has_architectural_pmu_version > 0) {
1863 num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8;
1864
1865
1866
1867
1868
1869 if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) {
1870 num_architectural_pmu_gp_counters = MAX_GP_COUNTERS;
1871 }
1872
1873 if (has_architectural_pmu_version > 1) {
1874 num_architectural_pmu_fixed_counters = edx & 0x1f;
1875
1876 if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) {
1877 num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS;
1878 }
1879 }
1880 }
1881 }
1882
1883 cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
1884
1885 for (i = 0x80000000; i <= limit; i++) {
1886 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1887 fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
1888 abort();
1889 }
1890 c = &cpuid_data.entries[cpuid_i++];
1891
1892 switch (i) {
1893 case 0x8000001d:
1894
1895 for (j = 0; ; j++) {
1896 c->function = i;
1897 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1898 c->index = j;
1899 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
1900
1901 if (c->eax == 0) {
1902 break;
1903 }
1904 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1905 fprintf(stderr, "cpuid_data is full, no space for "
1906 "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
1907 abort();
1908 }
1909 c = &cpuid_data.entries[cpuid_i++];
1910 }
1911 break;
1912 default:
1913 c->function = i;
1914 c->flags = 0;
1915 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1916 if (!c->eax && !c->ebx && !c->ecx && !c->edx) {
1917
1918
1919
1920
1921 cpuid_i--;
1922 }
1923 break;
1924 }
1925 }
1926
1927
1928 if (env->cpuid_xlevel2 > 0) {
1929 cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
1930
1931 for (i = 0xC0000000; i <= limit; i++) {
1932 if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
1933 fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
1934 abort();
1935 }
1936 c = &cpuid_data.entries[cpuid_i++];
1937
1938 c->function = i;
1939 c->flags = 0;
1940 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
1941 }
1942 }
1943
1944 cpuid_data.cpuid.nent = cpuid_i;
1945
1946 if (((env->cpuid_version >> 8)&0xF) >= 6
1947 && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) ==
1948 (CPUID_MCE | CPUID_MCA)
1949 && kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) {
1950 uint64_t mcg_cap, unsupported_caps;
1951 int banks;
1952 int ret;
1953
1954 ret = kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks);
1955 if (ret < 0) {
1956 fprintf(stderr, "kvm_get_mce_cap_supported: %s", strerror(-ret));
1957 return ret;
1958 }
1959
1960 if (banks < (env->mcg_cap & MCG_CAP_BANKS_MASK)) {
1961 error_report("kvm: Unsupported MCE bank count (QEMU = %d, KVM = %d)",
1962 (int)(env->mcg_cap & MCG_CAP_BANKS_MASK), banks);
1963 return -ENOTSUP;
1964 }
1965
1966 unsupported_caps = env->mcg_cap & ~(mcg_cap | MCG_CAP_BANKS_MASK);
1967 if (unsupported_caps) {
1968 if (unsupported_caps & MCG_LMCE_P) {
1969 error_report("kvm: LMCE not supported");
1970 return -ENOTSUP;
1971 }
1972 warn_report("Unsupported MCG_CAP bits: 0x%" PRIx64,
1973 unsupported_caps);
1974 }
1975
1976 env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK;
1977 ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &env->mcg_cap);
1978 if (ret < 0) {
1979 fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret));
1980 return ret;
1981 }
1982 }
1983
1984 cpu->vmsentry = qemu_add_vm_change_state_handler(cpu_update_state, env);
1985
1986 c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
1987 if (c) {
1988 has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
1989 !!(c->ecx & CPUID_EXT_SMX);
1990 }
1991
1992 c = cpuid_find_entry(&cpuid_data.cpuid, 7, 0);
1993 if (c && (c->ebx & CPUID_7_0_EBX_SGX)) {
1994 has_msr_feature_control = true;
1995 }
1996
1997 if (env->mcg_cap & MCG_LMCE_P) {
1998 has_msr_mcg_ext_ctl = has_msr_feature_control = true;
1999 }
2000
2001 if (!env->user_tsc_khz) {
2002 if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) &&
2003 invtsc_mig_blocker == NULL) {
2004 error_setg(&invtsc_mig_blocker,
2005 "State blocked by non-migratable CPU device"
2006 " (invtsc flag)");
2007 r = migrate_add_blocker(invtsc_mig_blocker, &local_err);
2008 if (r < 0) {
2009 error_report_err(local_err);
2010 return r;
2011 }
2012 }
2013 }
2014
2015 if (cpu->vmware_cpuid_freq
2016
2017
2018 && cpu->expose_kvm
2019 && kvm_base == KVM_CPUID_SIGNATURE
2020
2021 && tsc_is_stable_and_known(env)) {
2022
2023 c = &cpuid_data.entries[cpuid_i++];
2024 c->function = KVM_CPUID_SIGNATURE | 0x10;
2025 c->eax = env->tsc_khz;
2026 c->ebx = env->apic_bus_freq / 1000;
2027 c->ecx = c->edx = 0;
2028
2029 c = cpuid_find_entry(&cpuid_data.cpuid, kvm_base, 0);
2030 c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10);
2031 }
2032
2033 cpuid_data.cpuid.nent = cpuid_i;
2034
2035 cpuid_data.cpuid.padding = 0;
2036 r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data);
2037 if (r) {
2038 goto fail;
2039 }
2040 kvm_init_xsave(env);
2041
2042 max_nested_state_len = kvm_max_nested_state_length();
2043 if (max_nested_state_len > 0) {
2044 assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
2045
2046 if (cpu_has_vmx(env) || cpu_has_svm(env)) {
2047 struct kvm_vmx_nested_state_hdr *vmx_hdr;
2048
2049 env->nested_state = g_malloc0(max_nested_state_len);
2050 env->nested_state->size = max_nested_state_len;
2051
2052 if (cpu_has_vmx(env)) {
2053 env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
2054 vmx_hdr = &env->nested_state->hdr.vmx;
2055 vmx_hdr->vmxon_pa = -1ull;
2056 vmx_hdr->vmcs12_pa = -1ull;
2057 } else {
2058 env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
2059 }
2060 }
2061 }
2062
2063 cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE);
2064
2065 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) {
2066 has_msr_tsc_aux = false;
2067 }
2068
2069 kvm_init_msrs(cpu);
2070
2071 return 0;
2072
2073 fail:
2074 migrate_del_blocker(invtsc_mig_blocker);
2075
2076 return r;
2077}
2078
2079int kvm_arch_destroy_vcpu(CPUState *cs)
2080{
2081 X86CPU *cpu = X86_CPU(cs);
2082 CPUX86State *env = &cpu->env;
2083
2084 g_free(env->xsave_buf);
2085
2086 if (cpu->kvm_msr_buf) {
2087 g_free(cpu->kvm_msr_buf);
2088 cpu->kvm_msr_buf = NULL;
2089 }
2090
2091 if (env->nested_state) {
2092 g_free(env->nested_state);
2093 env->nested_state = NULL;
2094 }
2095
2096 qemu_del_vm_change_state_handler(cpu->vmsentry);
2097
2098 return 0;
2099}
2100
2101void kvm_arch_reset_vcpu(X86CPU *cpu)
2102{
2103 CPUX86State *env = &cpu->env;
2104
2105 env->xcr0 = 1;
2106 if (kvm_irqchip_in_kernel()) {
2107 env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE :
2108 KVM_MP_STATE_UNINITIALIZED;
2109 } else {
2110 env->mp_state = KVM_MP_STATE_RUNNABLE;
2111 }
2112
2113 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
2114 int i;
2115 for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) {
2116 env->msr_hv_synic_sint[i] = HV_SINT_MASKED;
2117 }
2118
2119 hyperv_x86_synic_reset(cpu);
2120 }
2121
2122 env->poll_control_msr = 1;
2123
2124 sev_es_set_reset_vector(CPU(cpu));
2125}
2126
2127void kvm_arch_do_init_vcpu(X86CPU *cpu)
2128{
2129 CPUX86State *env = &cpu->env;
2130
2131
2132 if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) {
2133 env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
2134 }
2135}
2136
2137static int kvm_get_supported_feature_msrs(KVMState *s)
2138{
2139 int ret = 0;
2140
2141 if (kvm_feature_msrs != NULL) {
2142 return 0;
2143 }
2144
2145 if (!kvm_check_extension(s, KVM_CAP_GET_MSR_FEATURES)) {
2146 return 0;
2147 }
2148
2149 struct kvm_msr_list msr_list;
2150
2151 msr_list.nmsrs = 0;
2152 ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list);
2153 if (ret < 0 && ret != -E2BIG) {
2154 error_report("Fetch KVM feature MSR list failed: %s",
2155 strerror(-ret));
2156 return ret;
2157 }
2158
2159 assert(msr_list.nmsrs > 0);
2160 kvm_feature_msrs = (struct kvm_msr_list *) \
2161 g_malloc0(sizeof(msr_list) +
2162 msr_list.nmsrs * sizeof(msr_list.indices[0]));
2163
2164 kvm_feature_msrs->nmsrs = msr_list.nmsrs;
2165 ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, kvm_feature_msrs);
2166
2167 if (ret < 0) {
2168 error_report("Fetch KVM feature MSR list failed: %s",
2169 strerror(-ret));
2170 g_free(kvm_feature_msrs);
2171 kvm_feature_msrs = NULL;
2172 return ret;
2173 }
2174
2175 return 0;
2176}
2177
2178static int kvm_get_supported_msrs(KVMState *s)
2179{
2180 int ret = 0;
2181 struct kvm_msr_list msr_list, *kvm_msr_list;
2182
2183
2184
2185
2186
2187 msr_list.nmsrs = 0;
2188 ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list);
2189 if (ret < 0 && ret != -E2BIG) {
2190 return ret;
2191 }
2192
2193
2194
2195
2196 kvm_msr_list = g_malloc0(MAX(1024, sizeof(msr_list) +
2197 msr_list.nmsrs *
2198 sizeof(msr_list.indices[0])));
2199
2200 kvm_msr_list->nmsrs = msr_list.nmsrs;
2201 ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
2202 if (ret >= 0) {
2203 int i;
2204
2205 for (i = 0; i < kvm_msr_list->nmsrs; i++) {
2206 switch (kvm_msr_list->indices[i]) {
2207 case MSR_STAR:
2208 has_msr_star = true;
2209 break;
2210 case MSR_VM_HSAVE_PA:
2211 has_msr_hsave_pa = true;
2212 break;
2213 case MSR_TSC_AUX:
2214 has_msr_tsc_aux = true;
2215 break;
2216 case MSR_TSC_ADJUST:
2217 has_msr_tsc_adjust = true;
2218 break;
2219 case MSR_IA32_TSCDEADLINE:
2220 has_msr_tsc_deadline = true;
2221 break;
2222 case MSR_IA32_SMBASE:
2223 has_msr_smbase = true;
2224 break;
2225 case MSR_SMI_COUNT:
2226 has_msr_smi_count = true;
2227 break;
2228 case MSR_IA32_MISC_ENABLE:
2229 has_msr_misc_enable = true;
2230 break;
2231 case MSR_IA32_BNDCFGS:
2232 has_msr_bndcfgs = true;
2233 break;
2234 case MSR_IA32_XSS:
2235 has_msr_xss = true;
2236 break;
2237 case MSR_IA32_UMWAIT_CONTROL:
2238 has_msr_umwait = true;
2239 break;
2240 case HV_X64_MSR_CRASH_CTL:
2241 has_msr_hv_crash = true;
2242 break;
2243 case HV_X64_MSR_RESET:
2244 has_msr_hv_reset = true;
2245 break;
2246 case HV_X64_MSR_VP_INDEX:
2247 has_msr_hv_vpindex = true;
2248 break;
2249 case HV_X64_MSR_VP_RUNTIME:
2250 has_msr_hv_runtime = true;
2251 break;
2252 case HV_X64_MSR_SCONTROL:
2253 has_msr_hv_synic = true;
2254 break;
2255 case HV_X64_MSR_STIMER0_CONFIG:
2256 has_msr_hv_stimer = true;
2257 break;
2258 case HV_X64_MSR_TSC_FREQUENCY:
2259 has_msr_hv_frequencies = true;
2260 break;
2261 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
2262 has_msr_hv_reenlightenment = true;
2263 break;
2264 case MSR_IA32_SPEC_CTRL:
2265 has_msr_spec_ctrl = true;
2266 break;
2267 case MSR_AMD64_TSC_RATIO:
2268 has_tsc_scale_msr = true;
2269 break;
2270 case MSR_IA32_TSX_CTRL:
2271 has_msr_tsx_ctrl = true;
2272 break;
2273 case MSR_VIRT_SSBD:
2274 has_msr_virt_ssbd = true;
2275 break;
2276 case MSR_IA32_ARCH_CAPABILITIES:
2277 has_msr_arch_capabs = true;
2278 break;
2279 case MSR_IA32_CORE_CAPABILITY:
2280 has_msr_core_capabs = true;
2281 break;
2282 case MSR_IA32_PERF_CAPABILITIES:
2283 has_msr_perf_capabs = true;
2284 break;
2285 case MSR_IA32_VMX_VMFUNC:
2286 has_msr_vmx_vmfunc = true;
2287 break;
2288 case MSR_IA32_UCODE_REV:
2289 has_msr_ucode_rev = true;
2290 break;
2291 case MSR_IA32_VMX_PROCBASED_CTLS2:
2292 has_msr_vmx_procbased_ctls2 = true;
2293 break;
2294 case MSR_IA32_PKRS:
2295 has_msr_pkrs = true;
2296 break;
2297 }
2298 }
2299 }
2300
2301 g_free(kvm_msr_list);
2302
2303 return ret;
2304}
2305
2306static Notifier smram_machine_done;
2307static KVMMemoryListener smram_listener;
2308static AddressSpace smram_address_space;
2309static MemoryRegion smram_as_root;
2310static MemoryRegion smram_as_mem;
2311
2312static void register_smram_listener(Notifier *n, void *unused)
2313{
2314 MemoryRegion *smram =
2315 (MemoryRegion *) object_resolve_path("/machine/smram", NULL);
2316
2317
2318 memory_region_init(&smram_as_root, OBJECT(kvm_state), "mem-container-smram", ~0ull);
2319 memory_region_set_enabled(&smram_as_root, true);
2320
2321
2322
2323
2324 memory_region_init_alias(&smram_as_mem, OBJECT(kvm_state), "mem-smram",
2325 get_system_memory(), 0, ~0ull);
2326 memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0);
2327 memory_region_set_enabled(&smram_as_mem, true);
2328
2329 if (smram) {
2330
2331 memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10);
2332 memory_region_set_enabled(smram, true);
2333 }
2334
2335 address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM");
2336 kvm_memory_listener_register(kvm_state, &smram_listener,
2337 &smram_address_space, 1, "kvm-smram");
2338}
2339
2340int kvm_arch_init(MachineState *ms, KVMState *s)
2341{
2342 uint64_t identity_base = 0xfffbc000;
2343 uint64_t shadow_mem;
2344 int ret;
2345 struct utsname utsname;
2346 Error *local_err = NULL;
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360 ret = sev_kvm_init(ms->cgs, &local_err);
2361 if (ret < 0) {
2362 error_report_err(local_err);
2363 return ret;
2364 }
2365
2366 if (!kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
2367 error_report("kvm: KVM_CAP_IRQ_ROUTING not supported by KVM");
2368 return -ENOTSUP;
2369 }
2370
2371 has_xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
2372 has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
2373 has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
2374 has_sregs2 = kvm_check_extension(s, KVM_CAP_SREGS2) > 0;
2375
2376 hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX);
2377
2378 has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD);
2379 if (has_exception_payload) {
2380 ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true);
2381 if (ret < 0) {
2382 error_report("kvm: Failed to enable exception payload cap: %s",
2383 strerror(-ret));
2384 return ret;
2385 }
2386 }
2387
2388 ret = kvm_get_supported_msrs(s);
2389 if (ret < 0) {
2390 return ret;
2391 }
2392
2393 kvm_get_supported_feature_msrs(s);
2394
2395 uname(&utsname);
2396 lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409 if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) {
2410
2411 identity_base = 0xfeffc000;
2412
2413 ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base);
2414 if (ret < 0) {
2415 return ret;
2416 }
2417 }
2418
2419
2420 ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000);
2421 if (ret < 0) {
2422 return ret;
2423 }
2424
2425
2426 ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED);
2427 if (ret < 0) {
2428 fprintf(stderr, "e820_add_entry() table is full\n");
2429 return ret;
2430 }
2431
2432 shadow_mem = object_property_get_int(OBJECT(s), "kvm-shadow-mem", &error_abort);
2433 if (shadow_mem != -1) {
2434 shadow_mem /= 4096;
2435 ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem);
2436 if (ret < 0) {
2437 return ret;
2438 }
2439 }
2440
2441 if (kvm_check_extension(s, KVM_CAP_X86_SMM) &&
2442 object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) &&
2443 x86_machine_is_smm_enabled(X86_MACHINE(ms))) {
2444 smram_machine_done.notify = register_smram_listener;
2445 qemu_add_machine_init_done_notifier(&smram_machine_done);
2446 }
2447
2448 if (enable_cpu_pm) {
2449 int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS);
2450 int ret;
2451
2452
2453#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT)
2454#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL
2455#endif
2456 if (disable_exits) {
2457 disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT |
2458 KVM_X86_DISABLE_EXITS_HLT |
2459 KVM_X86_DISABLE_EXITS_PAUSE |
2460 KVM_X86_DISABLE_EXITS_CSTATE);
2461 }
2462
2463 ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0,
2464 disable_exits);
2465 if (ret < 0) {
2466 error_report("kvm: guest stopping CPU not supported: %s",
2467 strerror(-ret));
2468 }
2469 }
2470
2471 if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) {
2472 X86MachineState *x86ms = X86_MACHINE(ms);
2473
2474 if (x86ms->bus_lock_ratelimit > 0) {
2475 ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT);
2476 if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) {
2477 error_report("kvm: bus lock detection unsupported");
2478 return -ENOTSUP;
2479 }
2480 ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0,
2481 KVM_BUS_LOCK_DETECTION_EXIT);
2482 if (ret < 0) {
2483 error_report("kvm: Failed to enable bus lock detection cap: %s",
2484 strerror(-ret));
2485 return ret;
2486 }
2487 ratelimit_init(&bus_lock_ratelimit_ctrl);
2488 ratelimit_set_speed(&bus_lock_ratelimit_ctrl,
2489 x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME);
2490 }
2491 }
2492
2493 return 0;
2494}
2495
2496static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
2497{
2498 lhs->selector = rhs->selector;
2499 lhs->base = rhs->base;
2500 lhs->limit = rhs->limit;
2501 lhs->type = 3;
2502 lhs->present = 1;
2503 lhs->dpl = 3;
2504 lhs->db = 0;
2505 lhs->s = 1;
2506 lhs->l = 0;
2507 lhs->g = 0;
2508 lhs->avl = 0;
2509 lhs->unusable = 0;
2510}
2511
2512static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
2513{
2514 unsigned flags = rhs->flags;
2515 lhs->selector = rhs->selector;
2516 lhs->base = rhs->base;
2517 lhs->limit = rhs->limit;
2518 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
2519 lhs->present = (flags & DESC_P_MASK) != 0;
2520 lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3;
2521 lhs->db = (flags >> DESC_B_SHIFT) & 1;
2522 lhs->s = (flags & DESC_S_MASK) != 0;
2523 lhs->l = (flags >> DESC_L_SHIFT) & 1;
2524 lhs->g = (flags & DESC_G_MASK) != 0;
2525 lhs->avl = (flags & DESC_AVL_MASK) != 0;
2526 lhs->unusable = !lhs->present;
2527 lhs->padding = 0;
2528}
2529
2530static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
2531{
2532 lhs->selector = rhs->selector;
2533 lhs->base = rhs->base;
2534 lhs->limit = rhs->limit;
2535 lhs->flags = (rhs->type << DESC_TYPE_SHIFT) |
2536 ((rhs->present && !rhs->unusable) * DESC_P_MASK) |
2537 (rhs->dpl << DESC_DPL_SHIFT) |
2538 (rhs->db << DESC_B_SHIFT) |
2539 (rhs->s * DESC_S_MASK) |
2540 (rhs->l << DESC_L_SHIFT) |
2541 (rhs->g * DESC_G_MASK) |
2542 (rhs->avl * DESC_AVL_MASK);
2543}
2544
2545static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
2546{
2547 if (set) {
2548 *kvm_reg = *qemu_reg;
2549 } else {
2550 *qemu_reg = *kvm_reg;
2551 }
2552}
2553
2554static int kvm_getput_regs(X86CPU *cpu, int set)
2555{
2556 CPUX86State *env = &cpu->env;
2557 struct kvm_regs regs;
2558 int ret = 0;
2559
2560 if (!set) {
2561 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_REGS, ®s);
2562 if (ret < 0) {
2563 return ret;
2564 }
2565 }
2566
2567 kvm_getput_reg(®s.rax, &env->regs[R_EAX], set);
2568 kvm_getput_reg(®s.rbx, &env->regs[R_EBX], set);
2569 kvm_getput_reg(®s.rcx, &env->regs[R_ECX], set);
2570 kvm_getput_reg(®s.rdx, &env->regs[R_EDX], set);
2571 kvm_getput_reg(®s.rsi, &env->regs[R_ESI], set);
2572 kvm_getput_reg(®s.rdi, &env->regs[R_EDI], set);
2573 kvm_getput_reg(®s.rsp, &env->regs[R_ESP], set);
2574 kvm_getput_reg(®s.rbp, &env->regs[R_EBP], set);
2575#ifdef TARGET_X86_64
2576 kvm_getput_reg(®s.r8, &env->regs[8], set);
2577 kvm_getput_reg(®s.r9, &env->regs[9], set);
2578 kvm_getput_reg(®s.r10, &env->regs[10], set);
2579 kvm_getput_reg(®s.r11, &env->regs[11], set);
2580 kvm_getput_reg(®s.r12, &env->regs[12], set);
2581 kvm_getput_reg(®s.r13, &env->regs[13], set);
2582 kvm_getput_reg(®s.r14, &env->regs[14], set);
2583 kvm_getput_reg(®s.r15, &env->regs[15], set);
2584#endif
2585
2586 kvm_getput_reg(®s.rflags, &env->eflags, set);
2587 kvm_getput_reg(®s.rip, &env->eip, set);
2588
2589 if (set) {
2590 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_REGS, ®s);
2591 }
2592
2593 return ret;
2594}
2595
2596static int kvm_put_fpu(X86CPU *cpu)
2597{
2598 CPUX86State *env = &cpu->env;
2599 struct kvm_fpu fpu;
2600 int i;
2601
2602 memset(&fpu, 0, sizeof fpu);
2603 fpu.fsw = env->fpus & ~(7 << 11);
2604 fpu.fsw |= (env->fpstt & 7) << 11;
2605 fpu.fcw = env->fpuc;
2606 fpu.last_opcode = env->fpop;
2607 fpu.last_ip = env->fpip;
2608 fpu.last_dp = env->fpdp;
2609 for (i = 0; i < 8; ++i) {
2610 fpu.ftwx |= (!env->fptags[i]) << i;
2611 }
2612 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
2613 for (i = 0; i < CPU_NB_REGS; i++) {
2614 stq_p(&fpu.xmm[i][0], env->xmm_regs[i].ZMM_Q(0));
2615 stq_p(&fpu.xmm[i][8], env->xmm_regs[i].ZMM_Q(1));
2616 }
2617 fpu.mxcsr = env->mxcsr;
2618
2619 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu);
2620}
2621
2622static int kvm_put_xsave(X86CPU *cpu)
2623{
2624 CPUX86State *env = &cpu->env;
2625 void *xsave = env->xsave_buf;
2626
2627 if (!has_xsave) {
2628 return kvm_put_fpu(cpu);
2629 }
2630 x86_cpu_xsave_all_areas(cpu, xsave, env->xsave_buf_len);
2631
2632 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
2633}
2634
2635static int kvm_put_xcrs(X86CPU *cpu)
2636{
2637 CPUX86State *env = &cpu->env;
2638 struct kvm_xcrs xcrs = {};
2639
2640 if (!has_xcrs) {
2641 return 0;
2642 }
2643
2644 xcrs.nr_xcrs = 1;
2645 xcrs.flags = 0;
2646 xcrs.xcrs[0].xcr = 0;
2647 xcrs.xcrs[0].value = env->xcr0;
2648 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XCRS, &xcrs);
2649}
2650
2651static int kvm_put_sregs(X86CPU *cpu)
2652{
2653 CPUX86State *env = &cpu->env;
2654 struct kvm_sregs sregs;
2655
2656
2657
2658
2659
2660 memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap));
2661
2662 if ((env->eflags & VM_MASK)) {
2663 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
2664 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
2665 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
2666 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
2667 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
2668 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
2669 } else {
2670 set_seg(&sregs.cs, &env->segs[R_CS]);
2671 set_seg(&sregs.ds, &env->segs[R_DS]);
2672 set_seg(&sregs.es, &env->segs[R_ES]);
2673 set_seg(&sregs.fs, &env->segs[R_FS]);
2674 set_seg(&sregs.gs, &env->segs[R_GS]);
2675 set_seg(&sregs.ss, &env->segs[R_SS]);
2676 }
2677
2678 set_seg(&sregs.tr, &env->tr);
2679 set_seg(&sregs.ldt, &env->ldt);
2680
2681 sregs.idt.limit = env->idt.limit;
2682 sregs.idt.base = env->idt.base;
2683 memset(sregs.idt.padding, 0, sizeof sregs.idt.padding);
2684 sregs.gdt.limit = env->gdt.limit;
2685 sregs.gdt.base = env->gdt.base;
2686 memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding);
2687
2688 sregs.cr0 = env->cr[0];
2689 sregs.cr2 = env->cr[2];
2690 sregs.cr3 = env->cr[3];
2691 sregs.cr4 = env->cr[4];
2692
2693 sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
2694 sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
2695
2696 sregs.efer = env->efer;
2697
2698 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
2699}
2700
2701static int kvm_put_sregs2(X86CPU *cpu)
2702{
2703 CPUX86State *env = &cpu->env;
2704 struct kvm_sregs2 sregs;
2705 int i;
2706
2707 sregs.flags = 0;
2708
2709 if ((env->eflags & VM_MASK)) {
2710 set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
2711 set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
2712 set_v8086_seg(&sregs.es, &env->segs[R_ES]);
2713 set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
2714 set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
2715 set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
2716 } else {
2717 set_seg(&sregs.cs, &env->segs[R_CS]);
2718 set_seg(&sregs.ds, &env->segs[R_DS]);
2719 set_seg(&sregs.es, &env->segs[R_ES]);
2720 set_seg(&sregs.fs, &env->segs[R_FS]);
2721 set_seg(&sregs.gs, &env->segs[R_GS]);
2722 set_seg(&sregs.ss, &env->segs[R_SS]);
2723 }
2724
2725 set_seg(&sregs.tr, &env->tr);
2726 set_seg(&sregs.ldt, &env->ldt);
2727
2728 sregs.idt.limit = env->idt.limit;
2729 sregs.idt.base = env->idt.base;
2730 memset(sregs.idt.padding, 0, sizeof sregs.idt.padding);
2731 sregs.gdt.limit = env->gdt.limit;
2732 sregs.gdt.base = env->gdt.base;
2733 memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding);
2734
2735 sregs.cr0 = env->cr[0];
2736 sregs.cr2 = env->cr[2];
2737 sregs.cr3 = env->cr[3];
2738 sregs.cr4 = env->cr[4];
2739
2740 sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
2741 sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
2742
2743 sregs.efer = env->efer;
2744
2745 if (env->pdptrs_valid) {
2746 for (i = 0; i < 4; i++) {
2747 sregs.pdptrs[i] = env->pdptrs[i];
2748 }
2749 sregs.flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID;
2750 }
2751
2752 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS2, &sregs);
2753}
2754
2755
2756static void kvm_msr_buf_reset(X86CPU *cpu)
2757{
2758 memset(cpu->kvm_msr_buf, 0, MSR_BUF_SIZE);
2759}
2760
2761static void kvm_msr_entry_add(X86CPU *cpu, uint32_t index, uint64_t value)
2762{
2763 struct kvm_msrs *msrs = cpu->kvm_msr_buf;
2764 void *limit = ((void *)msrs) + MSR_BUF_SIZE;
2765 struct kvm_msr_entry *entry = &msrs->entries[msrs->nmsrs];
2766
2767 assert((void *)(entry + 1) <= limit);
2768
2769 entry->index = index;
2770 entry->reserved = 0;
2771 entry->data = value;
2772 msrs->nmsrs++;
2773}
2774
2775static int kvm_put_one_msr(X86CPU *cpu, int index, uint64_t value)
2776{
2777 kvm_msr_buf_reset(cpu);
2778 kvm_msr_entry_add(cpu, index, value);
2779
2780 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
2781}
2782
2783void kvm_put_apicbase(X86CPU *cpu, uint64_t value)
2784{
2785 int ret;
2786
2787 ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value);
2788 assert(ret == 1);
2789}
2790
2791static int kvm_put_tscdeadline_msr(X86CPU *cpu)
2792{
2793 CPUX86State *env = &cpu->env;
2794 int ret;
2795
2796 if (!has_msr_tsc_deadline) {
2797 return 0;
2798 }
2799
2800 ret = kvm_put_one_msr(cpu, MSR_IA32_TSCDEADLINE, env->tsc_deadline);
2801 if (ret < 0) {
2802 return ret;
2803 }
2804
2805 assert(ret == 1);
2806 return 0;
2807}
2808
2809
2810
2811
2812
2813
2814
2815static int kvm_put_msr_feature_control(X86CPU *cpu)
2816{
2817 int ret;
2818
2819 if (!has_msr_feature_control) {
2820 return 0;
2821 }
2822
2823 ret = kvm_put_one_msr(cpu, MSR_IA32_FEATURE_CONTROL,
2824 cpu->env.msr_ia32_feature_control);
2825 if (ret < 0) {
2826 return ret;
2827 }
2828
2829 assert(ret == 1);
2830 return 0;
2831}
2832
2833static uint64_t make_vmx_msr_value(uint32_t index, uint32_t features)
2834{
2835 uint32_t default1, can_be_one, can_be_zero;
2836 uint32_t must_be_one;
2837
2838 switch (index) {
2839 case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
2840 default1 = 0x00000016;
2841 break;
2842 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2843 default1 = 0x0401e172;
2844 break;
2845 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2846 default1 = 0x000011ff;
2847 break;
2848 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2849 default1 = 0x00036dff;
2850 break;
2851 case MSR_IA32_VMX_PROCBASED_CTLS2:
2852 default1 = 0;
2853 break;
2854 default:
2855 abort();
2856 }
2857
2858
2859
2860
2861 can_be_one = features | default1;
2862 can_be_zero = features | ~default1;
2863 must_be_one = ~can_be_zero;
2864
2865
2866
2867
2868
2869 return must_be_one | (((uint64_t)can_be_one) << 32);
2870}
2871
2872static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f)
2873{
2874 uint64_t kvm_vmx_basic =
2875 kvm_arch_get_supported_msr_feature(kvm_state,
2876 MSR_IA32_VMX_BASIC);
2877
2878 if (!kvm_vmx_basic) {
2879
2880
2881
2882 return;
2883 }
2884
2885 uint64_t kvm_vmx_misc =
2886 kvm_arch_get_supported_msr_feature(kvm_state,
2887 MSR_IA32_VMX_MISC);
2888 uint64_t kvm_vmx_ept_vpid =
2889 kvm_arch_get_supported_msr_feature(kvm_state,
2890 MSR_IA32_VMX_EPT_VPID_CAP);
2891
2892
2893
2894
2895
2896 uint64_t fixed_vmx_exit = f[FEAT_8000_0001_EDX] & CPUID_EXT2_LM
2897 ? (uint64_t)VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE << 32 : 0;
2898
2899
2900
2901
2902
2903 uint64_t fixed_vmx_basic = kvm_vmx_basic &
2904 (MSR_VMX_BASIC_VMCS_REVISION_MASK |
2905 MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK |
2906 MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK);
2907
2908
2909
2910
2911
2912
2913
2914
2915 uint64_t fixed_vmx_misc = kvm_vmx_misc &
2916 (MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK |
2917 MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK);
2918
2919
2920
2921
2922
2923 uint64_t fixed_vmx_ept_mask =
2924 (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_ENABLE_EPT ?
2925 MSR_VMX_EPT_UC | MSR_VMX_EPT_WB : 0);
2926 uint64_t fixed_vmx_ept_vpid = kvm_vmx_ept_vpid & fixed_vmx_ept_mask;
2927
2928 kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
2929 make_vmx_msr_value(MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
2930 f[FEAT_VMX_PROCBASED_CTLS]));
2931 kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS,
2932 make_vmx_msr_value(MSR_IA32_VMX_TRUE_PINBASED_CTLS,
2933 f[FEAT_VMX_PINBASED_CTLS]));
2934 kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_EXIT_CTLS,
2935 make_vmx_msr_value(MSR_IA32_VMX_TRUE_EXIT_CTLS,
2936 f[FEAT_VMX_EXIT_CTLS]) | fixed_vmx_exit);
2937 kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS,
2938 make_vmx_msr_value(MSR_IA32_VMX_TRUE_ENTRY_CTLS,
2939 f[FEAT_VMX_ENTRY_CTLS]));
2940 kvm_msr_entry_add(cpu, MSR_IA32_VMX_PROCBASED_CTLS2,
2941 make_vmx_msr_value(MSR_IA32_VMX_PROCBASED_CTLS2,
2942 f[FEAT_VMX_SECONDARY_CTLS]));
2943 kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP,
2944 f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid);
2945 kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC,
2946 f[FEAT_VMX_BASIC] | fixed_vmx_basic);
2947 kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC,
2948 f[FEAT_VMX_MISC] | fixed_vmx_misc);
2949 if (has_msr_vmx_vmfunc) {
2950 kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMFUNC, f[FEAT_VMX_VMFUNC]);
2951 }
2952
2953
2954
2955
2956
2957 kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR0_FIXED0,
2958 CR0_PE_MASK | CR0_PG_MASK | CR0_NE_MASK);
2959 kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0,
2960 CR4_VMXE_MASK);
2961
2962 if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) {
2963
2964 kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x32);
2965 } else {
2966
2967 kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x2E);
2968 }
2969}
2970
2971static void kvm_msr_entry_add_perf(X86CPU *cpu, FeatureWordArray f)
2972{
2973 uint64_t kvm_perf_cap =
2974 kvm_arch_get_supported_msr_feature(kvm_state,
2975 MSR_IA32_PERF_CAPABILITIES);
2976
2977 if (kvm_perf_cap) {
2978 kvm_msr_entry_add(cpu, MSR_IA32_PERF_CAPABILITIES,
2979 kvm_perf_cap & f[FEAT_PERF_CAPABILITIES]);
2980 }
2981}
2982
2983static int kvm_buf_set_msrs(X86CPU *cpu)
2984{
2985 int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
2986 if (ret < 0) {
2987 return ret;
2988 }
2989
2990 if (ret < cpu->kvm_msr_buf->nmsrs) {
2991 struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
2992 error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64,
2993 (uint32_t)e->index, (uint64_t)e->data);
2994 }
2995
2996 assert(ret == cpu->kvm_msr_buf->nmsrs);
2997 return 0;
2998}
2999
3000static void kvm_init_msrs(X86CPU *cpu)
3001{
3002 CPUX86State *env = &cpu->env;
3003
3004 kvm_msr_buf_reset(cpu);
3005 if (has_msr_arch_capabs) {
3006 kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
3007 env->features[FEAT_ARCH_CAPABILITIES]);
3008 }
3009
3010 if (has_msr_core_capabs) {
3011 kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
3012 env->features[FEAT_CORE_CAPABILITY]);
3013 }
3014
3015 if (has_msr_perf_capabs && cpu->enable_pmu) {
3016 kvm_msr_entry_add_perf(cpu, env->features);
3017 }
3018
3019 if (has_msr_ucode_rev) {
3020 kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
3021 }
3022
3023
3024
3025
3026
3027 if (kvm_feature_msrs && cpu_has_vmx(env)) {
3028 kvm_msr_entry_add_vmx(cpu, env->features);
3029 }
3030
3031 assert(kvm_buf_set_msrs(cpu) == 0);
3032}
3033
3034static int kvm_put_msrs(X86CPU *cpu, int level)
3035{
3036 CPUX86State *env = &cpu->env;
3037 int i;
3038
3039 kvm_msr_buf_reset(cpu);
3040
3041 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, env->sysenter_cs);
3042 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
3043 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
3044 kvm_msr_entry_add(cpu, MSR_PAT, env->pat);
3045 if (has_msr_star) {
3046 kvm_msr_entry_add(cpu, MSR_STAR, env->star);
3047 }
3048 if (has_msr_hsave_pa) {
3049 kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, env->vm_hsave);
3050 }
3051 if (has_msr_tsc_aux) {
3052 kvm_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux);
3053 }
3054 if (has_msr_tsc_adjust) {
3055 kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust);
3056 }
3057 if (has_msr_misc_enable) {
3058 kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE,
3059 env->msr_ia32_misc_enable);
3060 }
3061 if (has_msr_smbase) {
3062 kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, env->smbase);
3063 }
3064 if (has_msr_smi_count) {
3065 kvm_msr_entry_add(cpu, MSR_SMI_COUNT, env->msr_smi_count);
3066 }
3067 if (has_msr_pkrs) {
3068 kvm_msr_entry_add(cpu, MSR_IA32_PKRS, env->pkrs);
3069 }
3070 if (has_msr_bndcfgs) {
3071 kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, env->msr_bndcfgs);
3072 }
3073 if (has_msr_xss) {
3074 kvm_msr_entry_add(cpu, MSR_IA32_XSS, env->xss);
3075 }
3076 if (has_msr_umwait) {
3077 kvm_msr_entry_add(cpu, MSR_IA32_UMWAIT_CONTROL, env->umwait);
3078 }
3079 if (has_msr_spec_ctrl) {
3080 kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, env->spec_ctrl);
3081 }
3082 if (has_tsc_scale_msr) {
3083 kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, env->amd_tsc_scale_msr);
3084 }
3085
3086 if (has_msr_tsx_ctrl) {
3087 kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, env->tsx_ctrl);
3088 }
3089 if (has_msr_virt_ssbd) {
3090 kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, env->virt_ssbd);
3091 }
3092
3093#ifdef TARGET_X86_64
3094 if (lm_capable_kernel) {
3095 kvm_msr_entry_add(cpu, MSR_CSTAR, env->cstar);
3096 kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase);
3097 kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask);
3098 kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar);
3099 }
3100#endif
3101
3102
3103
3104
3105
3106 if (level >= KVM_PUT_RESET_STATE) {
3107 kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc);
3108 kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr);
3109 kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
3110 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) {
3111 kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_INT, env->async_pf_int_msr);
3112 }
3113 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) {
3114 kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr);
3115 }
3116 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) {
3117 kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, env->pv_eoi_en_msr);
3118 }
3119 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) {
3120 kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, env->steal_time_msr);
3121 }
3122
3123 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) {
3124 kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, env->poll_control_msr);
3125 }
3126
3127 if (has_architectural_pmu_version > 0) {
3128 if (has_architectural_pmu_version > 1) {
3129
3130 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
3131 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
3132 }
3133
3134
3135 for (i = 0; i < num_architectural_pmu_fixed_counters; i++) {
3136 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i,
3137 env->msr_fixed_counters[i]);
3138 }
3139 for (i = 0; i < num_architectural_pmu_gp_counters; i++) {
3140 kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i,
3141 env->msr_gp_counters[i]);
3142 kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i,
3143 env->msr_gp_evtsel[i]);
3144 }
3145 if (has_architectural_pmu_version > 1) {
3146 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS,
3147 env->msr_global_status);
3148 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
3149 env->msr_global_ovf_ctrl);
3150
3151
3152 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL,
3153 env->msr_fixed_ctr_ctrl);
3154 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL,
3155 env->msr_global_ctrl);
3156 }
3157 }
3158
3159
3160
3161
3162 if (current_cpu == first_cpu) {
3163 if (has_msr_hv_hypercall) {
3164 kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID,
3165 env->msr_hv_guest_os_id);
3166 kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL,
3167 env->msr_hv_hypercall);
3168 }
3169 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_TIME)) {
3170 kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC,
3171 env->msr_hv_tsc);
3172 }
3173 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_REENLIGHTENMENT)) {
3174 kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL,
3175 env->msr_hv_reenlightenment_control);
3176 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL,
3177 env->msr_hv_tsc_emulation_control);
3178 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS,
3179 env->msr_hv_tsc_emulation_status);
3180 }
3181 }
3182 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) {
3183 kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE,
3184 env->msr_hv_vapic);
3185 }
3186 if (has_msr_hv_crash) {
3187 int j;
3188
3189 for (j = 0; j < HV_CRASH_PARAMS; j++)
3190 kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j,
3191 env->msr_hv_crash_params[j]);
3192
3193 kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_NOTIFY);
3194 }
3195 if (has_msr_hv_runtime) {
3196 kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime);
3197 }
3198 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX)
3199 && hv_vpindex_settable) {
3200 kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX,
3201 hyperv_vp_index(CPU(cpu)));
3202 }
3203 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
3204 int j;
3205
3206 kvm_msr_entry_add(cpu, HV_X64_MSR_SVERSION, HV_SYNIC_VERSION);
3207
3208 kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL,
3209 env->msr_hv_synic_control);
3210 kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP,
3211 env->msr_hv_synic_evt_page);
3212 kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP,
3213 env->msr_hv_synic_msg_page);
3214
3215 for (j = 0; j < ARRAY_SIZE(env->msr_hv_synic_sint); j++) {
3216 kvm_msr_entry_add(cpu, HV_X64_MSR_SINT0 + j,
3217 env->msr_hv_synic_sint[j]);
3218 }
3219 }
3220 if (has_msr_hv_stimer) {
3221 int j;
3222
3223 for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_config); j++) {
3224 kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_CONFIG + j * 2,
3225 env->msr_hv_stimer_config[j]);
3226 }
3227
3228 for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_count); j++) {
3229 kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_COUNT + j * 2,
3230 env->msr_hv_stimer_count[j]);
3231 }
3232 }
3233 if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
3234 uint64_t phys_mask = MAKE_64BIT_MASK(0, cpu->phys_bits);
3235
3236 kvm_msr_entry_add(cpu, MSR_MTRRdefType, env->mtrr_deftype);
3237 kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, env->mtrr_fixed[0]);
3238 kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, env->mtrr_fixed[1]);
3239 kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]);
3240 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]);
3241 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]);
3242 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]);
3243 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]);
3244 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]);
3245 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]);
3246 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]);
3247 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]);
3248 for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
3249
3250
3251
3252 uint64_t mask = env->mtrr_var[i].mask;
3253 mask &= phys_mask;
3254
3255 kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i),
3256 env->mtrr_var[i].base);
3257 kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask);
3258 }
3259 }
3260 if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) {
3261 int addr_num = kvm_arch_get_supported_cpuid(kvm_state,
3262 0x14, 1, R_EAX) & 0x7;
3263
3264 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL,
3265 env->msr_rtit_ctrl);
3266 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS,
3267 env->msr_rtit_status);
3268 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE,
3269 env->msr_rtit_output_base);
3270 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK,
3271 env->msr_rtit_output_mask);
3272 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH,
3273 env->msr_rtit_cr3_match);
3274 for (i = 0; i < addr_num; i++) {
3275 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i,
3276 env->msr_rtit_addrs[i]);
3277 }
3278 }
3279
3280 if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) {
3281 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0,
3282 env->msr_ia32_sgxlepubkeyhash[0]);
3283 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1,
3284 env->msr_ia32_sgxlepubkeyhash[1]);
3285 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2,
3286 env->msr_ia32_sgxlepubkeyhash[2]);
3287 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3,
3288 env->msr_ia32_sgxlepubkeyhash[3]);
3289 }
3290
3291 if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
3292 kvm_msr_entry_add(cpu, MSR_IA32_XFD,
3293 env->msr_xfd);
3294 kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
3295 env->msr_xfd_err);
3296 }
3297
3298
3299
3300 }
3301
3302 if (env->mcg_cap) {
3303 int i;
3304
3305 kvm_msr_entry_add(cpu, MSR_MCG_STATUS, env->mcg_status);
3306 kvm_msr_entry_add(cpu, MSR_MCG_CTL, env->mcg_ctl);
3307 if (has_msr_mcg_ext_ctl) {
3308 kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, env->mcg_ext_ctl);
3309 }
3310 for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
3311 kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, env->mce_banks[i]);
3312 }
3313 }
3314
3315 return kvm_buf_set_msrs(cpu);
3316}
3317
3318
3319static int kvm_get_fpu(X86CPU *cpu)
3320{
3321 CPUX86State *env = &cpu->env;
3322 struct kvm_fpu fpu;
3323 int i, ret;
3324
3325 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_FPU, &fpu);
3326 if (ret < 0) {
3327 return ret;
3328 }
3329
3330 env->fpstt = (fpu.fsw >> 11) & 7;
3331 env->fpus = fpu.fsw;
3332 env->fpuc = fpu.fcw;
3333 env->fpop = fpu.last_opcode;
3334 env->fpip = fpu.last_ip;
3335 env->fpdp = fpu.last_dp;
3336 for (i = 0; i < 8; ++i) {
3337 env->fptags[i] = !((fpu.ftwx >> i) & 1);
3338 }
3339 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
3340 for (i = 0; i < CPU_NB_REGS; i++) {
3341 env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.xmm[i][0]);
3342 env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.xmm[i][8]);
3343 }
3344 env->mxcsr = fpu.mxcsr;
3345
3346 return 0;
3347}
3348
3349static int kvm_get_xsave(X86CPU *cpu)
3350{
3351 CPUX86State *env = &cpu->env;
3352 void *xsave = env->xsave_buf;
3353 int type, ret;
3354
3355 if (!has_xsave) {
3356 return kvm_get_fpu(cpu);
3357 }
3358
3359 type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
3360 ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
3361 if (ret < 0) {
3362 return ret;
3363 }
3364 x86_cpu_xrstor_all_areas(cpu, xsave, env->xsave_buf_len);
3365
3366 return 0;
3367}
3368
3369static int kvm_get_xcrs(X86CPU *cpu)
3370{
3371 CPUX86State *env = &cpu->env;
3372 int i, ret;
3373 struct kvm_xcrs xcrs;
3374
3375 if (!has_xcrs) {
3376 return 0;
3377 }
3378
3379 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XCRS, &xcrs);
3380 if (ret < 0) {
3381 return ret;
3382 }
3383
3384 for (i = 0; i < xcrs.nr_xcrs; i++) {
3385
3386 if (xcrs.xcrs[i].xcr == 0) {
3387 env->xcr0 = xcrs.xcrs[i].value;
3388 break;
3389 }
3390 }
3391 return 0;
3392}
3393
3394static int kvm_get_sregs(X86CPU *cpu)
3395{
3396 CPUX86State *env = &cpu->env;
3397 struct kvm_sregs sregs;
3398 int ret;
3399
3400 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
3401 if (ret < 0) {
3402 return ret;
3403 }
3404
3405
3406
3407
3408
3409
3410 get_seg(&env->segs[R_CS], &sregs.cs);
3411 get_seg(&env->segs[R_DS], &sregs.ds);
3412 get_seg(&env->segs[R_ES], &sregs.es);
3413 get_seg(&env->segs[R_FS], &sregs.fs);
3414 get_seg(&env->segs[R_GS], &sregs.gs);
3415 get_seg(&env->segs[R_SS], &sregs.ss);
3416
3417 get_seg(&env->tr, &sregs.tr);
3418 get_seg(&env->ldt, &sregs.ldt);
3419
3420 env->idt.limit = sregs.idt.limit;
3421 env->idt.base = sregs.idt.base;
3422 env->gdt.limit = sregs.gdt.limit;
3423 env->gdt.base = sregs.gdt.base;
3424
3425 env->cr[0] = sregs.cr0;
3426 env->cr[2] = sregs.cr2;
3427 env->cr[3] = sregs.cr3;
3428 env->cr[4] = sregs.cr4;
3429
3430 env->efer = sregs.efer;
3431
3432
3433 x86_update_hflags(env);
3434
3435 return 0;
3436}
3437
3438static int kvm_get_sregs2(X86CPU *cpu)
3439{
3440 CPUX86State *env = &cpu->env;
3441 struct kvm_sregs2 sregs;
3442 int i, ret;
3443
3444 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS2, &sregs);
3445 if (ret < 0) {
3446 return ret;
3447 }
3448
3449 get_seg(&env->segs[R_CS], &sregs.cs);
3450 get_seg(&env->segs[R_DS], &sregs.ds);
3451 get_seg(&env->segs[R_ES], &sregs.es);
3452 get_seg(&env->segs[R_FS], &sregs.fs);
3453 get_seg(&env->segs[R_GS], &sregs.gs);
3454 get_seg(&env->segs[R_SS], &sregs.ss);
3455
3456 get_seg(&env->tr, &sregs.tr);
3457 get_seg(&env->ldt, &sregs.ldt);
3458
3459 env->idt.limit = sregs.idt.limit;
3460 env->idt.base = sregs.idt.base;
3461 env->gdt.limit = sregs.gdt.limit;
3462 env->gdt.base = sregs.gdt.base;
3463
3464 env->cr[0] = sregs.cr0;
3465 env->cr[2] = sregs.cr2;
3466 env->cr[3] = sregs.cr3;
3467 env->cr[4] = sregs.cr4;
3468
3469 env->efer = sregs.efer;
3470
3471 env->pdptrs_valid = sregs.flags & KVM_SREGS2_FLAGS_PDPTRS_VALID;
3472
3473 if (env->pdptrs_valid) {
3474 for (i = 0; i < 4; i++) {
3475 env->pdptrs[i] = sregs.pdptrs[i];
3476 }
3477 }
3478
3479
3480 x86_update_hflags(env);
3481
3482 return 0;
3483}
3484
3485static int kvm_get_msrs(X86CPU *cpu)
3486{
3487 CPUX86State *env = &cpu->env;
3488 struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries;
3489 int ret, i;
3490 uint64_t mtrr_top_bits;
3491
3492 kvm_msr_buf_reset(cpu);
3493
3494 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, 0);
3495 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, 0);
3496 kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, 0);
3497 kvm_msr_entry_add(cpu, MSR_PAT, 0);
3498 if (has_msr_star) {
3499 kvm_msr_entry_add(cpu, MSR_STAR, 0);
3500 }
3501 if (has_msr_hsave_pa) {
3502 kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, 0);
3503 }
3504 if (has_msr_tsc_aux) {
3505 kvm_msr_entry_add(cpu, MSR_TSC_AUX, 0);
3506 }
3507 if (has_msr_tsc_adjust) {
3508 kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, 0);
3509 }
3510 if (has_msr_tsc_deadline) {
3511 kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0);
3512 }
3513 if (has_msr_misc_enable) {
3514 kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, 0);
3515 }
3516 if (has_msr_smbase) {
3517 kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, 0);
3518 }
3519 if (has_msr_smi_count) {
3520 kvm_msr_entry_add(cpu, MSR_SMI_COUNT, 0);
3521 }
3522 if (has_msr_feature_control) {
3523 kvm_msr_entry_add(cpu, MSR_IA32_FEATURE_CONTROL, 0);
3524 }
3525 if (has_msr_pkrs) {
3526 kvm_msr_entry_add(cpu, MSR_IA32_PKRS, 0);
3527 }
3528 if (has_msr_bndcfgs) {
3529 kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, 0);
3530 }
3531 if (has_msr_xss) {
3532 kvm_msr_entry_add(cpu, MSR_IA32_XSS, 0);
3533 }
3534 if (has_msr_umwait) {
3535 kvm_msr_entry_add(cpu, MSR_IA32_UMWAIT_CONTROL, 0);
3536 }
3537 if (has_msr_spec_ctrl) {
3538 kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, 0);
3539 }
3540 if (has_tsc_scale_msr) {
3541 kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, 0);
3542 }
3543
3544 if (has_msr_tsx_ctrl) {
3545 kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, 0);
3546 }
3547 if (has_msr_virt_ssbd) {
3548 kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0);
3549 }
3550 if (!env->tsc_valid) {
3551 kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0);
3552 env->tsc_valid = !runstate_is_running();
3553 }
3554
3555#ifdef TARGET_X86_64
3556 if (lm_capable_kernel) {
3557 kvm_msr_entry_add(cpu, MSR_CSTAR, 0);
3558 kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0);
3559 kvm_msr_entry_add(cpu, MSR_FMASK, 0);
3560 kvm_msr_entry_add(cpu, MSR_LSTAR, 0);
3561 }
3562#endif
3563 kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0);
3564 kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, 0);
3565 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) {
3566 kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_INT, 0);
3567 }
3568 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) {
3569 kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, 0);
3570 }
3571 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) {
3572 kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, 0);
3573 }
3574 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) {
3575 kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, 0);
3576 }
3577 if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) {
3578 kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, 1);
3579 }
3580 if (has_architectural_pmu_version > 0) {
3581 if (has_architectural_pmu_version > 1) {
3582 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
3583 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
3584 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 0);
3585 kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0);
3586 }
3587 for (i = 0; i < num_architectural_pmu_fixed_counters; i++) {
3588 kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0);
3589 }
3590 for (i = 0; i < num_architectural_pmu_gp_counters; i++) {
3591 kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0);
3592 kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0);
3593 }
3594 }
3595
3596 if (env->mcg_cap) {
3597 kvm_msr_entry_add(cpu, MSR_MCG_STATUS, 0);
3598 kvm_msr_entry_add(cpu, MSR_MCG_CTL, 0);
3599 if (has_msr_mcg_ext_ctl) {
3600 kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, 0);
3601 }
3602 for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
3603 kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, 0);
3604 }
3605 }
3606
3607 if (has_msr_hv_hypercall) {
3608 kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, 0);
3609 kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, 0);
3610 }
3611 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) {
3612 kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, 0);
3613 }
3614 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_TIME)) {
3615 kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 0);
3616 }
3617 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_REENLIGHTENMENT)) {
3618 kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
3619 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
3620 kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 0);
3621 }
3622 if (has_msr_hv_crash) {
3623 int j;
3624
3625 for (j = 0; j < HV_CRASH_PARAMS; j++) {
3626 kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j, 0);
3627 }
3628 }
3629 if (has_msr_hv_runtime) {
3630 kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, 0);
3631 }
3632 if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) {
3633 uint32_t msr;
3634
3635 kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, 0);
3636 kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, 0);
3637 kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, 0);
3638 for (msr = HV_X64_MSR_SINT0; msr <= HV_X64_MSR_SINT15; msr++) {
3639 kvm_msr_entry_add(cpu, msr, 0);
3640 }
3641 }
3642 if (has_msr_hv_stimer) {
3643 uint32_t msr;
3644
3645 for (msr = HV_X64_MSR_STIMER0_CONFIG; msr <= HV_X64_MSR_STIMER3_COUNT;
3646 msr++) {
3647 kvm_msr_entry_add(cpu, msr, 0);
3648 }
3649 }
3650 if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
3651 kvm_msr_entry_add(cpu, MSR_MTRRdefType, 0);
3652 kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, 0);
3653 kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, 0);
3654 kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, 0);
3655 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, 0);
3656 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, 0);
3657 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, 0);
3658 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, 0);
3659 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, 0);
3660 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, 0);
3661 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, 0);
3662 kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, 0);
3663 for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
3664 kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), 0);
3665 kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), 0);
3666 }
3667 }
3668
3669 if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) {
3670 int addr_num =
3671 kvm_arch_get_supported_cpuid(kvm_state, 0x14, 1, R_EAX) & 0x7;
3672
3673 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 0);
3674 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 0);
3675 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 0);
3676 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 0);
3677 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 0);
3678 for (i = 0; i < addr_num; i++) {
3679 kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 0);
3680 }
3681 }
3682
3683 if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) {
3684 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0, 0);
3685 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1, 0);
3686 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2, 0);
3687 kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
3688 }
3689
3690 if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
3691 kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
3692 kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
3693 }
3694
3695 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
3696 if (ret < 0) {
3697 return ret;
3698 }
3699
3700 if (ret < cpu->kvm_msr_buf->nmsrs) {
3701 struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
3702 error_report("error: failed to get MSR 0x%" PRIx32,
3703 (uint32_t)e->index);
3704 }
3705
3706 assert(ret == cpu->kvm_msr_buf->nmsrs);
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723 if (cpu->fill_mtrr_mask) {
3724 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > 52);
3725 assert(cpu->phys_bits <= TARGET_PHYS_ADDR_SPACE_BITS);
3726 mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits);
3727 } else {
3728 mtrr_top_bits = 0;
3729 }
3730
3731 for (i = 0; i < ret; i++) {
3732 uint32_t index = msrs[i].index;
3733 switch (index) {
3734 case MSR_IA32_SYSENTER_CS:
3735 env->sysenter_cs = msrs[i].data;
3736 break;
3737 case MSR_IA32_SYSENTER_ESP:
3738 env->sysenter_esp = msrs[i].data;
3739 break;
3740 case MSR_IA32_SYSENTER_EIP:
3741 env->sysenter_eip = msrs[i].data;
3742 break;
3743 case MSR_PAT:
3744 env->pat = msrs[i].data;
3745 break;
3746 case MSR_STAR:
3747 env->star = msrs[i].data;
3748 break;
3749#ifdef TARGET_X86_64
3750 case MSR_CSTAR:
3751 env->cstar = msrs[i].data;
3752 break;
3753 case MSR_KERNELGSBASE:
3754 env->kernelgsbase = msrs[i].data;
3755 break;
3756 case MSR_FMASK:
3757 env->fmask = msrs[i].data;
3758 break;
3759 case MSR_LSTAR:
3760 env->lstar = msrs[i].data;
3761 break;
3762#endif
3763 case MSR_IA32_TSC:
3764 env->tsc = msrs[i].data;
3765 break;
3766 case MSR_TSC_AUX:
3767 env->tsc_aux = msrs[i].data;
3768 break;
3769 case MSR_TSC_ADJUST:
3770 env->tsc_adjust = msrs[i].data;
3771 break;
3772 case MSR_IA32_TSCDEADLINE:
3773 env->tsc_deadline = msrs[i].data;
3774 break;
3775 case MSR_VM_HSAVE_PA:
3776 env->vm_hsave = msrs[i].data;
3777 break;
3778 case MSR_KVM_SYSTEM_TIME:
3779 env->system_time_msr = msrs[i].data;
3780 break;
3781 case MSR_KVM_WALL_CLOCK:
3782 env->wall_clock_msr = msrs[i].data;
3783 break;
3784 case MSR_MCG_STATUS:
3785 env->mcg_status = msrs[i].data;
3786 break;
3787 case MSR_MCG_CTL:
3788 env->mcg_ctl = msrs[i].data;
3789 break;
3790 case MSR_MCG_EXT_CTL:
3791 env->mcg_ext_ctl = msrs[i].data;
3792 break;
3793 case MSR_IA32_MISC_ENABLE:
3794 env->msr_ia32_misc_enable = msrs[i].data;
3795 break;
3796 case MSR_IA32_SMBASE:
3797 env->smbase = msrs[i].data;
3798 break;
3799 case MSR_SMI_COUNT:
3800 env->msr_smi_count = msrs[i].data;
3801 break;
3802 case MSR_IA32_FEATURE_CONTROL:
3803 env->msr_ia32_feature_control = msrs[i].data;
3804 break;
3805 case MSR_IA32_BNDCFGS:
3806 env->msr_bndcfgs = msrs[i].data;
3807 break;
3808 case MSR_IA32_XSS:
3809 env->xss = msrs[i].data;
3810 break;
3811 case MSR_IA32_UMWAIT_CONTROL:
3812 env->umwait = msrs[i].data;
3813 break;
3814 case MSR_IA32_PKRS:
3815 env->pkrs = msrs[i].data;
3816 break;
3817 default:
3818 if (msrs[i].index >= MSR_MC0_CTL &&
3819 msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
3820 env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
3821 }
3822 break;
3823 case MSR_KVM_ASYNC_PF_EN:
3824 env->async_pf_en_msr = msrs[i].data;
3825 break;
3826 case MSR_KVM_ASYNC_PF_INT:
3827 env->async_pf_int_msr = msrs[i].data;
3828 break;
3829 case MSR_KVM_PV_EOI_EN:
3830 env->pv_eoi_en_msr = msrs[i].data;
3831 break;
3832 case MSR_KVM_STEAL_TIME:
3833 env->steal_time_msr = msrs[i].data;
3834 break;
3835 case MSR_KVM_POLL_CONTROL: {
3836 env->poll_control_msr = msrs[i].data;
3837 break;
3838 }
3839 case MSR_CORE_PERF_FIXED_CTR_CTRL:
3840 env->msr_fixed_ctr_ctrl = msrs[i].data;
3841 break;
3842 case MSR_CORE_PERF_GLOBAL_CTRL:
3843 env->msr_global_ctrl = msrs[i].data;
3844 break;
3845 case MSR_CORE_PERF_GLOBAL_STATUS:
3846 env->msr_global_status = msrs[i].data;
3847 break;
3848 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
3849 env->msr_global_ovf_ctrl = msrs[i].data;
3850 break;
3851 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1:
3852 env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data;
3853 break;
3854 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1:
3855 env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data;
3856 break;
3857 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
3858 env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
3859 break;
3860 case HV_X64_MSR_HYPERCALL:
3861 env->msr_hv_hypercall = msrs[i].data;
3862 break;
3863 case HV_X64_MSR_GUEST_OS_ID:
3864 env->msr_hv_guest_os_id = msrs[i].data;
3865 break;
3866 case HV_X64_MSR_APIC_ASSIST_PAGE:
3867 env->msr_hv_vapic = msrs[i].data;
3868 break;
3869 case HV_X64_MSR_REFERENCE_TSC:
3870 env->msr_hv_tsc = msrs[i].data;
3871 break;
3872 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3873 env->msr_hv_crash_params[index - HV_X64_MSR_CRASH_P0] = msrs[i].data;
3874 break;
3875 case HV_X64_MSR_VP_RUNTIME:
3876 env->msr_hv_runtime = msrs[i].data;
3877 break;
3878 case HV_X64_MSR_SCONTROL:
3879 env->msr_hv_synic_control = msrs[i].data;
3880 break;
3881 case HV_X64_MSR_SIEFP:
3882 env->msr_hv_synic_evt_page = msrs[i].data;
3883 break;
3884 case HV_X64_MSR_SIMP:
3885 env->msr_hv_synic_msg_page = msrs[i].data;
3886 break;
3887 case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
3888 env->msr_hv_synic_sint[index - HV_X64_MSR_SINT0] = msrs[i].data;
3889 break;
3890 case HV_X64_MSR_STIMER0_CONFIG:
3891 case HV_X64_MSR_STIMER1_CONFIG:
3892 case HV_X64_MSR_STIMER2_CONFIG:
3893 case HV_X64_MSR_STIMER3_CONFIG:
3894 env->msr_hv_stimer_config[(index - HV_X64_MSR_STIMER0_CONFIG)/2] =
3895 msrs[i].data;
3896 break;
3897 case HV_X64_MSR_STIMER0_COUNT:
3898 case HV_X64_MSR_STIMER1_COUNT:
3899 case HV_X64_MSR_STIMER2_COUNT:
3900 case HV_X64_MSR_STIMER3_COUNT:
3901 env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2] =
3902 msrs[i].data;
3903 break;
3904 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3905 env->msr_hv_reenlightenment_control = msrs[i].data;
3906 break;
3907 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3908 env->msr_hv_tsc_emulation_control = msrs[i].data;
3909 break;
3910 case HV_X64_MSR_TSC_EMULATION_STATUS:
3911 env->msr_hv_tsc_emulation_status = msrs[i].data;
3912 break;
3913 case MSR_MTRRdefType:
3914 env->mtrr_deftype = msrs[i].data;
3915 break;
3916 case MSR_MTRRfix64K_00000:
3917 env->mtrr_fixed[0] = msrs[i].data;
3918 break;
3919 case MSR_MTRRfix16K_80000:
3920 env->mtrr_fixed[1] = msrs[i].data;
3921 break;
3922 case MSR_MTRRfix16K_A0000:
3923 env->mtrr_fixed[2] = msrs[i].data;
3924 break;
3925 case MSR_MTRRfix4K_C0000:
3926 env->mtrr_fixed[3] = msrs[i].data;
3927 break;
3928 case MSR_MTRRfix4K_C8000:
3929 env->mtrr_fixed[4] = msrs[i].data;
3930 break;
3931 case MSR_MTRRfix4K_D0000:
3932 env->mtrr_fixed[5] = msrs[i].data;
3933 break;
3934 case MSR_MTRRfix4K_D8000:
3935 env->mtrr_fixed[6] = msrs[i].data;
3936 break;
3937 case MSR_MTRRfix4K_E0000:
3938 env->mtrr_fixed[7] = msrs[i].data;
3939 break;
3940 case MSR_MTRRfix4K_E8000:
3941 env->mtrr_fixed[8] = msrs[i].data;
3942 break;
3943 case MSR_MTRRfix4K_F0000:
3944 env->mtrr_fixed[9] = msrs[i].data;
3945 break;
3946 case MSR_MTRRfix4K_F8000:
3947 env->mtrr_fixed[10] = msrs[i].data;
3948 break;
3949 case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
3950 if (index & 1) {
3951 env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data |
3952 mtrr_top_bits;
3953 } else {
3954 env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
3955 }
3956 break;
3957 case MSR_IA32_SPEC_CTRL:
3958 env->spec_ctrl = msrs[i].data;
3959 break;
3960 case MSR_AMD64_TSC_RATIO:
3961 env->amd_tsc_scale_msr = msrs[i].data;
3962 break;
3963 case MSR_IA32_TSX_CTRL:
3964 env->tsx_ctrl = msrs[i].data;
3965 break;
3966 case MSR_VIRT_SSBD:
3967 env->virt_ssbd = msrs[i].data;
3968 break;
3969 case MSR_IA32_RTIT_CTL:
3970 env->msr_rtit_ctrl = msrs[i].data;
3971 break;
3972 case MSR_IA32_RTIT_STATUS:
3973 env->msr_rtit_status = msrs[i].data;
3974 break;
3975 case MSR_IA32_RTIT_OUTPUT_BASE:
3976 env->msr_rtit_output_base = msrs[i].data;
3977 break;
3978 case MSR_IA32_RTIT_OUTPUT_MASK:
3979 env->msr_rtit_output_mask = msrs[i].data;
3980 break;
3981 case MSR_IA32_RTIT_CR3_MATCH:
3982 env->msr_rtit_cr3_match = msrs[i].data;
3983 break;
3984 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
3985 env->msr_rtit_addrs[index - MSR_IA32_RTIT_ADDR0_A] = msrs[i].data;
3986 break;
3987 case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3:
3988 env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
3989 msrs[i].data;
3990 break;
3991 case MSR_IA32_XFD:
3992 env->msr_xfd = msrs[i].data;
3993 break;
3994 case MSR_IA32_XFD_ERR:
3995 env->msr_xfd_err = msrs[i].data;
3996 break;
3997 }
3998 }
3999
4000 return 0;
4001}
4002
4003static int kvm_put_mp_state(X86CPU *cpu)
4004{
4005 struct kvm_mp_state mp_state = { .mp_state = cpu->env.mp_state };
4006
4007 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
4008}
4009
4010static int kvm_get_mp_state(X86CPU *cpu)
4011{
4012 CPUState *cs = CPU(cpu);
4013 CPUX86State *env = &cpu->env;
4014 struct kvm_mp_state mp_state;
4015 int ret;
4016
4017 ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state);
4018 if (ret < 0) {
4019 return ret;
4020 }
4021 env->mp_state = mp_state.mp_state;
4022 if (kvm_irqchip_in_kernel()) {
4023 cs->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED);
4024 }
4025 return 0;
4026}
4027
4028static int kvm_get_apic(X86CPU *cpu)
4029{
4030 DeviceState *apic = cpu->apic_state;
4031 struct kvm_lapic_state kapic;
4032 int ret;
4033
4034 if (apic && kvm_irqchip_in_kernel()) {
4035 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_LAPIC, &kapic);
4036 if (ret < 0) {
4037 return ret;
4038 }
4039
4040 kvm_get_apic_state(apic, &kapic);
4041 }
4042 return 0;
4043}
4044
4045static int kvm_put_vcpu_events(X86CPU *cpu, int level)
4046{
4047 CPUState *cs = CPU(cpu);
4048 CPUX86State *env = &cpu->env;
4049 struct kvm_vcpu_events events = {};
4050
4051 if (!kvm_has_vcpu_events()) {
4052 return 0;
4053 }
4054
4055 events.flags = 0;
4056
4057 if (has_exception_payload) {
4058 events.flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
4059 events.exception.pending = env->exception_pending;
4060 events.exception_has_payload = env->exception_has_payload;
4061 events.exception_payload = env->exception_payload;
4062 }
4063 events.exception.nr = env->exception_nr;
4064 events.exception.injected = env->exception_injected;
4065 events.exception.has_error_code = env->has_error_code;
4066 events.exception.error_code = env->error_code;
4067
4068 events.interrupt.injected = (env->interrupt_injected >= 0);
4069 events.interrupt.nr = env->interrupt_injected;
4070 events.interrupt.soft = env->soft_interrupt;
4071
4072 events.nmi.injected = env->nmi_injected;
4073 events.nmi.pending = env->nmi_pending;
4074 events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK);
4075
4076 events.sipi_vector = env->sipi_vector;
4077
4078 if (has_msr_smbase) {
4079 events.smi.smm = !!(env->hflags & HF_SMM_MASK);
4080 events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK);
4081 if (kvm_irqchip_in_kernel()) {
4082
4083
4084
4085 events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI;
4086 events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT;
4087 cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI);
4088 } else {
4089
4090 events.smi.pending = 0;
4091 events.smi.latched_init = 0;
4092 }
4093
4094
4095
4096 if (!cpu->kvm_no_smi_migration) {
4097 events.flags |= KVM_VCPUEVENT_VALID_SMM;
4098 }
4099 }
4100
4101 if (level >= KVM_PUT_RESET_STATE) {
4102 events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
4103 if (env->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
4104 events.flags |= KVM_VCPUEVENT_VALID_SIPI_VECTOR;
4105 }
4106 }
4107
4108 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events);
4109}
4110
4111static int kvm_get_vcpu_events(X86CPU *cpu)
4112{
4113 CPUX86State *env = &cpu->env;
4114 struct kvm_vcpu_events events;
4115 int ret;
4116
4117 if (!kvm_has_vcpu_events()) {
4118 return 0;
4119 }
4120
4121 memset(&events, 0, sizeof(events));
4122 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events);
4123 if (ret < 0) {
4124 return ret;
4125 }
4126
4127 if (events.flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
4128 env->exception_pending = events.exception.pending;
4129 env->exception_has_payload = events.exception_has_payload;
4130 env->exception_payload = events.exception_payload;
4131 } else {
4132 env->exception_pending = 0;
4133 env->exception_has_payload = false;
4134 }
4135 env->exception_injected = events.exception.injected;
4136 env->exception_nr =
4137 (env->exception_pending || env->exception_injected) ?
4138 events.exception.nr : -1;
4139 env->has_error_code = events.exception.has_error_code;
4140 env->error_code = events.exception.error_code;
4141
4142 env->interrupt_injected =
4143 events.interrupt.injected ? events.interrupt.nr : -1;
4144 env->soft_interrupt = events.interrupt.soft;
4145
4146 env->nmi_injected = events.nmi.injected;
4147 env->nmi_pending = events.nmi.pending;
4148 if (events.nmi.masked) {
4149 env->hflags2 |= HF2_NMI_MASK;
4150 } else {
4151 env->hflags2 &= ~HF2_NMI_MASK;
4152 }
4153
4154 if (events.flags & KVM_VCPUEVENT_VALID_SMM) {
4155 if (events.smi.smm) {
4156 env->hflags |= HF_SMM_MASK;
4157 } else {
4158 env->hflags &= ~HF_SMM_MASK;
4159 }
4160 if (events.smi.pending) {
4161 cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
4162 } else {
4163 cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI);
4164 }
4165 if (events.smi.smm_inside_nmi) {
4166 env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK;
4167 } else {
4168 env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK;
4169 }
4170 if (events.smi.latched_init) {
4171 cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
4172 } else {
4173 cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT);
4174 }
4175 }
4176
4177 env->sipi_vector = events.sipi_vector;
4178
4179 return 0;
4180}
4181
4182static int kvm_guest_debug_workarounds(X86CPU *cpu)
4183{
4184 CPUState *cs = CPU(cpu);
4185 CPUX86State *env = &cpu->env;
4186 int ret = 0;
4187 unsigned long reinject_trap = 0;
4188
4189 if (!kvm_has_vcpu_events()) {
4190 if (env->exception_nr == EXCP01_DB) {
4191 reinject_trap = KVM_GUESTDBG_INJECT_DB;
4192 } else if (env->exception_injected == EXCP03_INT3) {
4193 reinject_trap = KVM_GUESTDBG_INJECT_BP;
4194 }
4195 kvm_reset_exception(env);
4196 }
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206 if (reinject_trap ||
4207 (!kvm_has_robust_singlestep() && cs->singlestep_enabled)) {
4208 ret = kvm_update_guest_debug(cs, reinject_trap);
4209 }
4210 return ret;
4211}
4212
4213static int kvm_put_debugregs(X86CPU *cpu)
4214{
4215 CPUX86State *env = &cpu->env;
4216 struct kvm_debugregs dbgregs;
4217 int i;
4218
4219 if (!kvm_has_debugregs()) {
4220 return 0;
4221 }
4222
4223 memset(&dbgregs, 0, sizeof(dbgregs));
4224 for (i = 0; i < 4; i++) {
4225 dbgregs.db[i] = env->dr[i];
4226 }
4227 dbgregs.dr6 = env->dr[6];
4228 dbgregs.dr7 = env->dr[7];
4229 dbgregs.flags = 0;
4230
4231 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEBUGREGS, &dbgregs);
4232}
4233
4234static int kvm_get_debugregs(X86CPU *cpu)
4235{
4236 CPUX86State *env = &cpu->env;
4237 struct kvm_debugregs dbgregs;
4238 int i, ret;
4239
4240 if (!kvm_has_debugregs()) {
4241 return 0;
4242 }
4243
4244 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEBUGREGS, &dbgregs);
4245 if (ret < 0) {
4246 return ret;
4247 }
4248 for (i = 0; i < 4; i++) {
4249 env->dr[i] = dbgregs.db[i];
4250 }
4251 env->dr[4] = env->dr[6] = dbgregs.dr6;
4252 env->dr[5] = env->dr[7] = dbgregs.dr7;
4253
4254 return 0;
4255}
4256
4257static int kvm_put_nested_state(X86CPU *cpu)
4258{
4259 CPUX86State *env = &cpu->env;
4260 int max_nested_state_len = kvm_max_nested_state_length();
4261
4262 if (!env->nested_state) {
4263 return 0;
4264 }
4265
4266
4267
4268
4269 if (env->hflags & HF_GUEST_MASK) {
4270 env->nested_state->flags |= KVM_STATE_NESTED_GUEST_MODE;
4271 } else {
4272 env->nested_state->flags &= ~KVM_STATE_NESTED_GUEST_MODE;
4273 }
4274
4275
4276 if (cpu_has_svm(env) && (env->hflags2 & HF2_GIF_MASK)) {
4277 env->nested_state->flags |= KVM_STATE_NESTED_GIF_SET;
4278 } else {
4279 env->nested_state->flags &= ~KVM_STATE_NESTED_GIF_SET;
4280 }
4281
4282 assert(env->nested_state->size <= max_nested_state_len);
4283 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state);
4284}
4285
4286static int kvm_get_nested_state(X86CPU *cpu)
4287{
4288 CPUX86State *env = &cpu->env;
4289 int max_nested_state_len = kvm_max_nested_state_length();
4290 int ret;
4291
4292 if (!env->nested_state) {
4293 return 0;
4294 }
4295
4296
4297
4298
4299
4300
4301
4302
4303 env->nested_state->size = max_nested_state_len;
4304
4305 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state);
4306 if (ret < 0) {
4307 return ret;
4308 }
4309
4310
4311
4312
4313 if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) {
4314 env->hflags |= HF_GUEST_MASK;
4315 } else {
4316 env->hflags &= ~HF_GUEST_MASK;
4317 }
4318
4319
4320 if (cpu_has_svm(env)) {
4321 if (env->nested_state->flags & KVM_STATE_NESTED_GIF_SET) {
4322 env->hflags2 |= HF2_GIF_MASK;
4323 } else {
4324 env->hflags2 &= ~HF2_GIF_MASK;
4325 }
4326 }
4327
4328 return ret;
4329}
4330
4331int kvm_arch_put_registers(CPUState *cpu, int level)
4332{
4333 X86CPU *x86_cpu = X86_CPU(cpu);
4334 int ret;
4335
4336 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
4337
4338
4339 ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu);
4340 if (ret < 0) {
4341 return ret;
4342 }
4343
4344 if (level >= KVM_PUT_RESET_STATE) {
4345 ret = kvm_put_nested_state(x86_cpu);
4346 if (ret < 0) {
4347 return ret;
4348 }
4349
4350 ret = kvm_put_msr_feature_control(x86_cpu);
4351 if (ret < 0) {
4352 return ret;
4353 }
4354 }
4355
4356 if (level == KVM_PUT_FULL_STATE) {
4357
4358
4359
4360
4361
4362 kvm_arch_set_tsc_khz(cpu);
4363 }
4364
4365 ret = kvm_getput_regs(x86_cpu, 1);
4366 if (ret < 0) {
4367 return ret;
4368 }
4369 ret = kvm_put_xsave(x86_cpu);
4370 if (ret < 0) {
4371 return ret;
4372 }
4373 ret = kvm_put_xcrs(x86_cpu);
4374 if (ret < 0) {
4375 return ret;
4376 }
4377
4378 ret = kvm_inject_mce_oldstyle(x86_cpu);
4379 if (ret < 0) {
4380 return ret;
4381 }
4382 ret = kvm_put_msrs(x86_cpu, level);
4383 if (ret < 0) {
4384 return ret;
4385 }
4386 ret = kvm_put_vcpu_events(x86_cpu, level);
4387 if (ret < 0) {
4388 return ret;
4389 }
4390 if (level >= KVM_PUT_RESET_STATE) {
4391 ret = kvm_put_mp_state(x86_cpu);
4392 if (ret < 0) {
4393 return ret;
4394 }
4395 }
4396
4397 ret = kvm_put_tscdeadline_msr(x86_cpu);
4398 if (ret < 0) {
4399 return ret;
4400 }
4401 ret = kvm_put_debugregs(x86_cpu);
4402 if (ret < 0) {
4403 return ret;
4404 }
4405
4406 ret = kvm_guest_debug_workarounds(x86_cpu);
4407 if (ret < 0) {
4408 return ret;
4409 }
4410 return 0;
4411}
4412
4413int kvm_arch_get_registers(CPUState *cs)
4414{
4415 X86CPU *cpu = X86_CPU(cs);
4416 int ret;
4417
4418 assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs));
4419
4420 ret = kvm_get_vcpu_events(cpu);
4421 if (ret < 0) {
4422 goto out;
4423 }
4424
4425
4426
4427
4428 ret = kvm_get_mp_state(cpu);
4429 if (ret < 0) {
4430 goto out;
4431 }
4432 ret = kvm_getput_regs(cpu, 0);
4433 if (ret < 0) {
4434 goto out;
4435 }
4436 ret = kvm_get_xsave(cpu);
4437 if (ret < 0) {
4438 goto out;
4439 }
4440 ret = kvm_get_xcrs(cpu);
4441 if (ret < 0) {
4442 goto out;
4443 }
4444 ret = has_sregs2 ? kvm_get_sregs2(cpu) : kvm_get_sregs(cpu);
4445 if (ret < 0) {
4446 goto out;
4447 }
4448 ret = kvm_get_msrs(cpu);
4449 if (ret < 0) {
4450 goto out;
4451 }
4452 ret = kvm_get_apic(cpu);
4453 if (ret < 0) {
4454 goto out;
4455 }
4456 ret = kvm_get_debugregs(cpu);
4457 if (ret < 0) {
4458 goto out;
4459 }
4460 ret = kvm_get_nested_state(cpu);
4461 if (ret < 0) {
4462 goto out;
4463 }
4464 ret = 0;
4465 out:
4466 cpu_sync_bndcs_hflags(&cpu->env);
4467 return ret;
4468}
4469
4470void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
4471{
4472 X86CPU *x86_cpu = X86_CPU(cpu);
4473 CPUX86State *env = &x86_cpu->env;
4474 int ret;
4475
4476
4477 if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
4478 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
4479 qemu_mutex_lock_iothread();
4480 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
4481 qemu_mutex_unlock_iothread();
4482 DPRINTF("injected NMI\n");
4483 ret = kvm_vcpu_ioctl(cpu, KVM_NMI);
4484 if (ret < 0) {
4485 fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
4486 strerror(-ret));
4487 }
4488 }
4489 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
4490 qemu_mutex_lock_iothread();
4491 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
4492 qemu_mutex_unlock_iothread();
4493 DPRINTF("injected SMI\n");
4494 ret = kvm_vcpu_ioctl(cpu, KVM_SMI);
4495 if (ret < 0) {
4496 fprintf(stderr, "KVM: injection failed, SMI lost (%s)\n",
4497 strerror(-ret));
4498 }
4499 }
4500 }
4501
4502 if (!kvm_pic_in_kernel()) {
4503 qemu_mutex_lock_iothread();
4504 }
4505
4506
4507
4508
4509
4510 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
4511 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
4512 !(env->hflags & HF_SMM_MASK)) {
4513 cpu->exit_request = 1;
4514 }
4515 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
4516 cpu->exit_request = 1;
4517 }
4518 }
4519
4520 if (!kvm_pic_in_kernel()) {
4521
4522 if (run->ready_for_interrupt_injection &&
4523 (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
4524 (env->eflags & IF_MASK)) {
4525 int irq;
4526
4527 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
4528 irq = cpu_get_pic_interrupt(env);
4529 if (irq >= 0) {
4530 struct kvm_interrupt intr;
4531
4532 intr.irq = irq;
4533 DPRINTF("injected interrupt %d\n", irq);
4534 ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
4535 if (ret < 0) {
4536 fprintf(stderr,
4537 "KVM: injection failed, interrupt lost (%s)\n",
4538 strerror(-ret));
4539 }
4540 }
4541 }
4542
4543
4544
4545
4546
4547 if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
4548 run->request_interrupt_window = 1;
4549 } else {
4550 run->request_interrupt_window = 0;
4551 }
4552
4553 DPRINTF("setting tpr\n");
4554 run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
4555
4556 qemu_mutex_unlock_iothread();
4557 }
4558}
4559
4560static void kvm_rate_limit_on_bus_lock(void)
4561{
4562 uint64_t delay_ns = ratelimit_calculate_delay(&bus_lock_ratelimit_ctrl, 1);
4563
4564 if (delay_ns) {
4565 g_usleep(delay_ns / SCALE_US);
4566 }
4567}
4568
4569MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
4570{
4571 X86CPU *x86_cpu = X86_CPU(cpu);
4572 CPUX86State *env = &x86_cpu->env;
4573
4574 if (run->flags & KVM_RUN_X86_SMM) {
4575 env->hflags |= HF_SMM_MASK;
4576 } else {
4577 env->hflags &= ~HF_SMM_MASK;
4578 }
4579 if (run->if_flag) {
4580 env->eflags |= IF_MASK;
4581 } else {
4582 env->eflags &= ~IF_MASK;
4583 }
4584 if (run->flags & KVM_RUN_X86_BUS_LOCK) {
4585 kvm_rate_limit_on_bus_lock();
4586 }
4587
4588
4589
4590 if (!kvm_irqchip_in_kernel()) {
4591 qemu_mutex_lock_iothread();
4592 }
4593 cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8);
4594 cpu_set_apic_base(x86_cpu->apic_state, run->apic_base);
4595 if (!kvm_irqchip_in_kernel()) {
4596 qemu_mutex_unlock_iothread();
4597 }
4598 return cpu_get_mem_attrs(env);
4599}
4600
4601int kvm_arch_process_async_events(CPUState *cs)
4602{
4603 X86CPU *cpu = X86_CPU(cs);
4604 CPUX86State *env = &cpu->env;
4605
4606 if (cs->interrupt_request & CPU_INTERRUPT_MCE) {
4607
4608 assert(env->mcg_cap);
4609
4610 cs->interrupt_request &= ~CPU_INTERRUPT_MCE;
4611
4612 kvm_cpu_synchronize_state(cs);
4613
4614 if (env->exception_nr == EXCP08_DBLE) {
4615
4616 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
4617 cs->exit_request = 1;
4618 return 0;
4619 }
4620 kvm_queue_exception(env, EXCP12_MCHK, 0, 0);
4621 env->has_error_code = 0;
4622
4623 cs->halted = 0;
4624 if (kvm_irqchip_in_kernel() && env->mp_state == KVM_MP_STATE_HALTED) {
4625 env->mp_state = KVM_MP_STATE_RUNNABLE;
4626 }
4627 }
4628
4629 if ((cs->interrupt_request & CPU_INTERRUPT_INIT) &&
4630 !(env->hflags & HF_SMM_MASK)) {
4631 kvm_cpu_synchronize_state(cs);
4632 do_cpu_init(cpu);
4633 }
4634
4635 if (kvm_irqchip_in_kernel()) {
4636 return 0;
4637 }
4638
4639 if (cs->interrupt_request & CPU_INTERRUPT_POLL) {
4640 cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
4641 apic_poll_irq(cpu->apic_state);
4642 }
4643 if (((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
4644 (env->eflags & IF_MASK)) ||
4645 (cs->interrupt_request & CPU_INTERRUPT_NMI)) {
4646 cs->halted = 0;
4647 }
4648 if (cs->interrupt_request & CPU_INTERRUPT_SIPI) {
4649 kvm_cpu_synchronize_state(cs);
4650 do_cpu_sipi(cpu);
4651 }
4652 if (cs->interrupt_request & CPU_INTERRUPT_TPR) {
4653 cs->interrupt_request &= ~CPU_INTERRUPT_TPR;
4654 kvm_cpu_synchronize_state(cs);
4655 apic_handle_tpr_access_report(cpu->apic_state, env->eip,
4656 env->tpr_access_type);
4657 }
4658
4659 return cs->halted;
4660}
4661
4662static int kvm_handle_halt(X86CPU *cpu)
4663{
4664 CPUState *cs = CPU(cpu);
4665 CPUX86State *env = &cpu->env;
4666
4667 if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
4668 (env->eflags & IF_MASK)) &&
4669 !(cs->interrupt_request & CPU_INTERRUPT_NMI)) {
4670 cs->halted = 1;
4671 return EXCP_HLT;
4672 }
4673
4674 return 0;
4675}
4676
4677static int kvm_handle_tpr_access(X86CPU *cpu)
4678{
4679 CPUState *cs = CPU(cpu);
4680 struct kvm_run *run = cs->kvm_run;
4681
4682 apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip,
4683 run->tpr_access.is_write ? TPR_ACCESS_WRITE
4684 : TPR_ACCESS_READ);
4685 return 1;
4686}
4687
4688int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
4689{
4690 static const uint8_t int3 = 0xcc;
4691
4692 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
4693 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&int3, 1, 1)) {
4694 return -EINVAL;
4695 }
4696 return 0;
4697}
4698
4699int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
4700{
4701 uint8_t int3;
4702
4703 if (cpu_memory_rw_debug(cs, bp->pc, &int3, 1, 0)) {
4704 return -EINVAL;
4705 }
4706 if (int3 != 0xcc) {
4707 return 0;
4708 }
4709 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) {
4710 return -EINVAL;
4711 }
4712 return 0;
4713}
4714
4715static struct {
4716 target_ulong addr;
4717 int len;
4718 int type;
4719} hw_breakpoint[4];
4720
4721static int nb_hw_breakpoint;
4722
4723static int find_hw_breakpoint(target_ulong addr, int len, int type)
4724{
4725 int n;
4726
4727 for (n = 0; n < nb_hw_breakpoint; n++) {
4728 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
4729 (hw_breakpoint[n].len == len || len == -1)) {
4730 return n;
4731 }
4732 }
4733 return -1;
4734}
4735
4736int kvm_arch_insert_hw_breakpoint(target_ulong addr,
4737 target_ulong len, int type)
4738{
4739 switch (type) {
4740 case GDB_BREAKPOINT_HW:
4741 len = 1;
4742 break;
4743 case GDB_WATCHPOINT_WRITE:
4744 case GDB_WATCHPOINT_ACCESS:
4745 switch (len) {
4746 case 1:
4747 break;
4748 case 2:
4749 case 4:
4750 case 8:
4751 if (addr & (len - 1)) {
4752 return -EINVAL;
4753 }
4754 break;
4755 default:
4756 return -EINVAL;
4757 }
4758 break;
4759 default:
4760 return -ENOSYS;
4761 }
4762
4763 if (nb_hw_breakpoint == 4) {
4764 return -ENOBUFS;
4765 }
4766 if (find_hw_breakpoint(addr, len, type) >= 0) {
4767 return -EEXIST;
4768 }
4769 hw_breakpoint[nb_hw_breakpoint].addr = addr;
4770 hw_breakpoint[nb_hw_breakpoint].len = len;
4771 hw_breakpoint[nb_hw_breakpoint].type = type;
4772 nb_hw_breakpoint++;
4773
4774 return 0;
4775}
4776
4777int kvm_arch_remove_hw_breakpoint(target_ulong addr,
4778 target_ulong len, int type)
4779{
4780 int n;
4781
4782 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
4783 if (n < 0) {
4784 return -ENOENT;
4785 }
4786 nb_hw_breakpoint--;
4787 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
4788
4789 return 0;
4790}
4791
4792void kvm_arch_remove_all_hw_breakpoints(void)
4793{
4794 nb_hw_breakpoint = 0;
4795}
4796
4797static CPUWatchpoint hw_watchpoint;
4798
4799static int kvm_handle_debug(X86CPU *cpu,
4800 struct kvm_debug_exit_arch *arch_info)
4801{
4802 CPUState *cs = CPU(cpu);
4803 CPUX86State *env = &cpu->env;
4804 int ret = 0;
4805 int n;
4806
4807 if (arch_info->exception == EXCP01_DB) {
4808 if (arch_info->dr6 & DR6_BS) {
4809 if (cs->singlestep_enabled) {
4810 ret = EXCP_DEBUG;
4811 }
4812 } else {
4813 for (n = 0; n < 4; n++) {
4814 if (arch_info->dr6 & (1 << n)) {
4815 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
4816 case 0x0:
4817 ret = EXCP_DEBUG;
4818 break;
4819 case 0x1:
4820 ret = EXCP_DEBUG;
4821 cs->watchpoint_hit = &hw_watchpoint;
4822 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
4823 hw_watchpoint.flags = BP_MEM_WRITE;
4824 break;
4825 case 0x3:
4826 ret = EXCP_DEBUG;
4827 cs->watchpoint_hit = &hw_watchpoint;
4828 hw_watchpoint.vaddr = hw_breakpoint[n].addr;
4829 hw_watchpoint.flags = BP_MEM_ACCESS;
4830 break;
4831 }
4832 }
4833 }
4834 }
4835 } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) {
4836 ret = EXCP_DEBUG;
4837 }
4838 if (ret == 0) {
4839 cpu_synchronize_state(cs);
4840 assert(env->exception_nr == -1);
4841
4842
4843 kvm_queue_exception(env, arch_info->exception,
4844 arch_info->exception == EXCP01_DB,
4845 arch_info->dr6);
4846 env->has_error_code = 0;
4847 }
4848
4849 return ret;
4850}
4851
4852void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
4853{
4854 const uint8_t type_code[] = {
4855 [GDB_BREAKPOINT_HW] = 0x0,
4856 [GDB_WATCHPOINT_WRITE] = 0x1,
4857 [GDB_WATCHPOINT_ACCESS] = 0x3
4858 };
4859 const uint8_t len_code[] = {
4860 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
4861 };
4862 int n;
4863
4864 if (kvm_sw_breakpoints_active(cpu)) {
4865 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
4866 }
4867 if (nb_hw_breakpoint > 0) {
4868 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
4869 dbg->arch.debugreg[7] = 0x0600;
4870 for (n = 0; n < nb_hw_breakpoint; n++) {
4871 dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
4872 dbg->arch.debugreg[7] |= (2 << (n * 2)) |
4873 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
4874 ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4));
4875 }
4876 }
4877}
4878
4879static bool has_sgx_provisioning;
4880
4881static bool __kvm_enable_sgx_provisioning(KVMState *s)
4882{
4883 int fd, ret;
4884
4885 if (!kvm_vm_check_extension(s, KVM_CAP_SGX_ATTRIBUTE)) {
4886 return false;
4887 }
4888
4889 fd = qemu_open_old("/dev/sgx_provision", O_RDONLY);
4890 if (fd < 0) {
4891 return false;
4892 }
4893
4894 ret = kvm_vm_enable_cap(s, KVM_CAP_SGX_ATTRIBUTE, 0, fd);
4895 if (ret) {
4896 error_report("Could not enable SGX PROVISIONKEY: %s", strerror(-ret));
4897 exit(1);
4898 }
4899 close(fd);
4900 return true;
4901}
4902
4903bool kvm_enable_sgx_provisioning(KVMState *s)
4904{
4905 return MEMORIZE(__kvm_enable_sgx_provisioning(s), has_sgx_provisioning);
4906}
4907
4908static bool host_supports_vmx(void)
4909{
4910 uint32_t ecx, unused;
4911
4912 host_cpuid(1, 0, &unused, &unused, &ecx, &unused);
4913 return ecx & CPUID_EXT_VMX;
4914}
4915
4916#define VMX_INVALID_GUEST_STATE 0x80000021
4917
4918int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
4919{
4920 X86CPU *cpu = X86_CPU(cs);
4921 uint64_t code;
4922 int ret;
4923
4924 switch (run->exit_reason) {
4925 case KVM_EXIT_HLT:
4926 DPRINTF("handle_hlt\n");
4927 qemu_mutex_lock_iothread();
4928 ret = kvm_handle_halt(cpu);
4929 qemu_mutex_unlock_iothread();
4930 break;
4931 case KVM_EXIT_SET_TPR:
4932 ret = 0;
4933 break;
4934 case KVM_EXIT_TPR_ACCESS:
4935 qemu_mutex_lock_iothread();
4936 ret = kvm_handle_tpr_access(cpu);
4937 qemu_mutex_unlock_iothread();
4938 break;
4939 case KVM_EXIT_FAIL_ENTRY:
4940 code = run->fail_entry.hardware_entry_failure_reason;
4941 fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n",
4942 code);
4943 if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) {
4944 fprintf(stderr,
4945 "\nIf you're running a guest on an Intel machine without "
4946 "unrestricted mode\n"
4947 "support, the failure can be most likely due to the guest "
4948 "entering an invalid\n"
4949 "state for Intel VT. For example, the guest maybe running "
4950 "in big real mode\n"
4951 "which is not supported on less recent Intel processors."
4952 "\n\n");
4953 }
4954 ret = -1;
4955 break;
4956 case KVM_EXIT_EXCEPTION:
4957 fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n",
4958 run->ex.exception, run->ex.error_code);
4959 ret = -1;
4960 break;
4961 case KVM_EXIT_DEBUG:
4962 DPRINTF("kvm_exit_debug\n");
4963 qemu_mutex_lock_iothread();
4964 ret = kvm_handle_debug(cpu, &run->debug.arch);
4965 qemu_mutex_unlock_iothread();
4966 break;
4967 case KVM_EXIT_HYPERV:
4968 ret = kvm_hv_handle_exit(cpu, &run->hyperv);
4969 break;
4970 case KVM_EXIT_IOAPIC_EOI:
4971 ioapic_eoi_broadcast(run->eoi.vector);
4972 ret = 0;
4973 break;
4974 case KVM_EXIT_X86_BUS_LOCK:
4975
4976 ret = 0;
4977 break;
4978 default:
4979 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
4980 ret = -1;
4981 break;
4982 }
4983
4984 return ret;
4985}
4986
4987bool kvm_arch_stop_on_emulation_error(CPUState *cs)
4988{
4989 X86CPU *cpu = X86_CPU(cs);
4990 CPUX86State *env = &cpu->env;
4991
4992 kvm_cpu_synchronize_state(cs);
4993 return !(env->cr[0] & CR0_PE_MASK) ||
4994 ((env->segs[R_CS].selector & 3) != 3);
4995}
4996
4997void kvm_arch_init_irq_routing(KVMState *s)
4998{
4999
5000
5001
5002
5003 kvm_msi_via_irqfd_allowed = true;
5004 kvm_gsi_routing_allowed = true;
5005
5006 if (kvm_irqchip_is_split()) {
5007 KVMRouteChange c = kvm_irqchip_begin_route_changes(s);
5008 int i;
5009
5010
5011
5012 for (i = 0; i < IOAPIC_NUM_PINS; i++) {
5013 if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) {
5014 error_report("Could not enable split IRQ mode.");
5015 exit(1);
5016 }
5017 }
5018 kvm_irqchip_commit_route_changes(&c);
5019 }
5020}
5021
5022int kvm_arch_irqchip_create(KVMState *s)
5023{
5024 int ret;
5025 if (kvm_kernel_irqchip_split()) {
5026 ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24);
5027 if (ret) {
5028 error_report("Could not enable split irqchip mode: %s",
5029 strerror(-ret));
5030 exit(1);
5031 } else {
5032 DPRINTF("Enabled KVM_CAP_SPLIT_IRQCHIP\n");
5033 kvm_split_irqchip = true;
5034 return 1;
5035 }
5036 } else {
5037 return 0;
5038 }
5039}
5040
5041uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address)
5042{
5043 CPUX86State *env;
5044 uint64_t ext_id;
5045
5046 if (!first_cpu) {
5047 return address;
5048 }
5049 env = &X86_CPU(first_cpu)->env;
5050 if (!(env->features[FEAT_KVM] & (1 << KVM_FEATURE_MSI_EXT_DEST_ID))) {
5051 return address;
5052 }
5053
5054
5055
5056
5057
5058
5059 ext_id = address & (0xff << MSI_ADDR_DEST_IDX_SHIFT);
5060 if (!ext_id || (ext_id & (1 << MSI_ADDR_DEST_IDX_SHIFT)) || (address >> 32)) {
5061 return address;
5062 }
5063
5064 address &= ~ext_id;
5065 address |= ext_id << 35;
5066 return address;
5067}
5068
5069int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
5070 uint64_t address, uint32_t data, PCIDevice *dev)
5071{
5072 X86IOMMUState *iommu = x86_iommu_get_default();
5073
5074 if (iommu) {
5075 X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu);
5076
5077 if (class->int_remap) {
5078 int ret;
5079 MSIMessage src, dst;
5080
5081 src.address = route->u.msi.address_hi;
5082 src.address <<= VTD_MSI_ADDR_HI_SHIFT;
5083 src.address |= route->u.msi.address_lo;
5084 src.data = route->u.msi.data;
5085
5086 ret = class->int_remap(iommu, &src, &dst, dev ? \
5087 pci_requester_id(dev) : \
5088 X86_IOMMU_SID_INVALID);
5089 if (ret) {
5090 trace_kvm_x86_fixup_msi_error(route->gsi);
5091 return 1;
5092 }
5093
5094
5095
5096
5097 dst.address = kvm_swizzle_msi_ext_dest_id(dst.address);
5098
5099 route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT;
5100 route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK;
5101 route->u.msi.data = dst.data;
5102 return 0;
5103 }
5104 }
5105
5106 address = kvm_swizzle_msi_ext_dest_id(address);
5107 route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT;
5108 route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK;
5109 return 0;
5110}
5111
5112typedef struct MSIRouteEntry MSIRouteEntry;
5113
5114struct MSIRouteEntry {
5115 PCIDevice *dev;
5116 int vector;
5117 int virq;
5118 QLIST_ENTRY(MSIRouteEntry) list;
5119};
5120
5121
5122static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \
5123 QLIST_HEAD_INITIALIZER(msi_route_list);
5124
5125static void kvm_update_msi_routes_all(void *private, bool global,
5126 uint32_t index, uint32_t mask)
5127{
5128 int cnt = 0, vector;
5129 MSIRouteEntry *entry;
5130 MSIMessage msg;
5131 PCIDevice *dev;
5132
5133
5134 QLIST_FOREACH(entry, &msi_route_list, list) {
5135 cnt++;
5136 vector = entry->vector;
5137 dev = entry->dev;
5138 if (msix_enabled(dev) && !msix_is_masked(dev, vector)) {
5139 msg = msix_get_message(dev, vector);
5140 } else if (msi_enabled(dev) && !msi_is_masked(dev, vector)) {
5141 msg = msi_get_message(dev, vector);
5142 } else {
5143
5144
5145
5146
5147 continue;
5148 }
5149 kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev);
5150 }
5151 kvm_irqchip_commit_routes(kvm_state);
5152 trace_kvm_x86_update_msi_routes(cnt);
5153}
5154
5155int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
5156 int vector, PCIDevice *dev)
5157{
5158 static bool notify_list_inited = false;
5159 MSIRouteEntry *entry;
5160
5161 if (!dev) {
5162
5163
5164
5165 return 0;
5166 }
5167
5168 entry = g_new0(MSIRouteEntry, 1);
5169 entry->dev = dev;
5170 entry->vector = vector;
5171 entry->virq = route->gsi;
5172 QLIST_INSERT_HEAD(&msi_route_list, entry, list);
5173
5174 trace_kvm_x86_add_msi_route(route->gsi);
5175
5176 if (!notify_list_inited) {
5177
5178
5179 X86IOMMUState *iommu = x86_iommu_get_default();
5180 if (iommu) {
5181 x86_iommu_iec_register_notifier(iommu,
5182 kvm_update_msi_routes_all,
5183 NULL);
5184 }
5185 notify_list_inited = true;
5186 }
5187 return 0;
5188}
5189
5190int kvm_arch_release_virq_post(int virq)
5191{
5192 MSIRouteEntry *entry, *next;
5193 QLIST_FOREACH_SAFE(entry, &msi_route_list, list, next) {
5194 if (entry->virq == virq) {
5195 trace_kvm_x86_remove_msi_route(virq);
5196 QLIST_REMOVE(entry, list);
5197 g_free(entry);
5198 break;
5199 }
5200 }
5201 return 0;
5202}
5203
5204int kvm_arch_msi_data_to_gsi(uint32_t data)
5205{
5206 abort();
5207}
5208
5209bool kvm_has_waitpkg(void)
5210{
5211 return has_msr_umwait;
5212}
5213
5214bool kvm_arch_cpu_check_are_resettable(void)
5215{
5216 return !sev_es_enabled();
5217}
5218
5219#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
5220
5221void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
5222{
5223 KVMState *s = kvm_state;
5224 uint64_t supported;
5225
5226 mask &= XSTATE_DYNAMIC_MASK;
5227 if (!mask) {
5228 return;
5229 }
5230
5231
5232
5233
5234
5235 supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
5236 supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
5237 mask &= supported;
5238
5239 while (mask) {
5240 int bit = ctz64(mask);
5241 int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
5242 if (rc) {
5243
5244
5245
5246
5247
5248 warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
5249 "for feature bit %d", bit);
5250 }
5251 mask &= ~BIT_ULL(bit);
5252 }
5253}
5254