1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kvm_host.h>
20#include "irq.h"
21#include "ioapic.h"
22#include "mmu.h"
23#include "i8254.h"
24#include "tss.h"
25#include "kvm_cache_regs.h"
26#include "kvm_emulate.h"
27#include "x86.h"
28#include "cpuid.h"
29#include "pmu.h"
30#include "hyperv.h"
31#include "lapic.h"
32#include "xen.h"
33
34#include <linux/clocksource.h>
35#include <linux/interrupt.h>
36#include <linux/kvm.h>
37#include <linux/fs.h>
38#include <linux/vmalloc.h>
39#include <linux/export.h>
40#include <linux/moduleparam.h>
41#include <linux/mman.h>
42#include <linux/highmem.h>
43#include <linux/iommu.h>
44#include <linux/intel-iommu.h>
45#include <linux/cpufreq.h>
46#include <linux/user-return-notifier.h>
47#include <linux/srcu.h>
48#include <linux/slab.h>
49#include <linux/perf_event.h>
50#include <linux/uaccess.h>
51#include <linux/hash.h>
52#include <linux/pci.h>
53#include <linux/timekeeper_internal.h>
54#include <linux/pvclock_gtod.h>
55#include <linux/kvm_irqfd.h>
56#include <linux/irqbypass.h>
57#include <linux/sched/stat.h>
58#include <linux/sched/isolation.h>
59#include <linux/mem_encrypt.h>
60#include <linux/entry-kvm.h>
61#include <linux/suspend.h>
62
63#include <trace/events/kvm.h>
64
65#include <asm/debugreg.h>
66#include <asm/msr.h>
67#include <asm/desc.h>
68#include <asm/mce.h>
69#include <asm/pkru.h>
70#include <linux/kernel_stat.h>
71#include <asm/fpu/api.h>
72#include <asm/fpu/xcr.h>
73#include <asm/fpu/xstate.h>
74#include <asm/pvclock.h>
75#include <asm/div64.h>
76#include <asm/irq_remapping.h>
77#include <asm/mshyperv.h>
78#include <asm/hypervisor.h>
79#include <asm/tlbflush.h>
80#include <asm/intel_pt.h>
81#include <asm/emulate_prefix.h>
82#include <asm/sgx.h>
83#include <clocksource/hyperv_timer.h>
84
85#define CREATE_TRACE_POINTS
86#include "trace.h"
87
88#define MAX_IO_MSRS 256
89#define KVM_MAX_MCE_BANKS 32
90u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
91EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
92
93#define emul_to_vcpu(ctxt) \
94 ((struct kvm_vcpu *)(ctxt)->vcpu)
95
96
97
98
99
100#ifdef CONFIG_X86_64
101static
102u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
103#else
104static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
105#endif
106
107static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
108
109#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
110
111#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
112 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
113
114static void update_cr8_intercept(struct kvm_vcpu *vcpu);
115static void process_nmi(struct kvm_vcpu *vcpu);
116static void process_smi(struct kvm_vcpu *vcpu);
117static void enter_smm(struct kvm_vcpu *vcpu);
118static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
119static void store_regs(struct kvm_vcpu *vcpu);
120static int sync_regs(struct kvm_vcpu *vcpu);
121
122static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
123static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
124
125struct kvm_x86_ops kvm_x86_ops __read_mostly;
126EXPORT_SYMBOL_GPL(kvm_x86_ops);
127
128#define KVM_X86_OP(func) \
129 DEFINE_STATIC_CALL_NULL(kvm_x86_##func, \
130 *(((struct kvm_x86_ops *)0)->func));
131#define KVM_X86_OP_NULL KVM_X86_OP
132#include <asm/kvm-x86-ops.h>
133EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
134EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
135EXPORT_STATIC_CALL_GPL(kvm_x86_tlb_flush_current);
136
137static bool __read_mostly ignore_msrs = 0;
138module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
139
140bool __read_mostly report_ignored_msrs = true;
141module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
142EXPORT_SYMBOL_GPL(report_ignored_msrs);
143
144unsigned int min_timer_period_us = 200;
145module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
146
147static bool __read_mostly kvmclock_periodic_sync = true;
148module_param(kvmclock_periodic_sync, bool, S_IRUGO);
149
150bool __read_mostly kvm_has_tsc_control;
151EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
152u32 __read_mostly kvm_max_guest_tsc_khz;
153EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
154u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
155EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
156u64 __read_mostly kvm_max_tsc_scaling_ratio;
157EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
158u64 __read_mostly kvm_default_tsc_scaling_ratio;
159EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
160bool __read_mostly kvm_has_bus_lock_exit;
161EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit);
162
163
164static u32 __read_mostly tsc_tolerance_ppm = 250;
165module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
166
167
168
169
170
171
172
173static int __read_mostly lapic_timer_advance_ns = -1;
174module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
175
176static bool __read_mostly vector_hashing = true;
177module_param(vector_hashing, bool, S_IRUGO);
178
179bool __read_mostly enable_vmware_backdoor = false;
180module_param(enable_vmware_backdoor, bool, S_IRUGO);
181EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
182
183static bool __read_mostly force_emulation_prefix = false;
184module_param(force_emulation_prefix, bool, S_IRUGO);
185
186int __read_mostly pi_inject_timer = -1;
187module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
188
189
190
191
192
193
194#define KVM_MAX_NR_USER_RETURN_MSRS 16
195
196struct kvm_user_return_msrs {
197 struct user_return_notifier urn;
198 bool registered;
199 struct kvm_user_return_msr_values {
200 u64 host;
201 u64 curr;
202 } values[KVM_MAX_NR_USER_RETURN_MSRS];
203};
204
205u32 __read_mostly kvm_nr_uret_msrs;
206EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
207static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
208static struct kvm_user_return_msrs __percpu *user_return_msrs;
209
210#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
211 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
212 | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
213 | XFEATURE_MASK_PKRU)
214
215u64 __read_mostly host_efer;
216EXPORT_SYMBOL_GPL(host_efer);
217
218bool __read_mostly allow_smaller_maxphyaddr = 0;
219EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
220
221bool __read_mostly enable_apicv = true;
222EXPORT_SYMBOL_GPL(enable_apicv);
223
224u64 __read_mostly host_xss;
225EXPORT_SYMBOL_GPL(host_xss);
226u64 __read_mostly supported_xss;
227EXPORT_SYMBOL_GPL(supported_xss);
228
229const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
230 KVM_GENERIC_VM_STATS(),
231 STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
232 STATS_DESC_COUNTER(VM, mmu_pte_write),
233 STATS_DESC_COUNTER(VM, mmu_pde_zapped),
234 STATS_DESC_COUNTER(VM, mmu_flooded),
235 STATS_DESC_COUNTER(VM, mmu_recycled),
236 STATS_DESC_COUNTER(VM, mmu_cache_miss),
237 STATS_DESC_ICOUNTER(VM, mmu_unsync),
238 STATS_DESC_ICOUNTER(VM, pages_4k),
239 STATS_DESC_ICOUNTER(VM, pages_2m),
240 STATS_DESC_ICOUNTER(VM, pages_1g),
241 STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
242 STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
243 STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
244};
245
246const struct kvm_stats_header kvm_vm_stats_header = {
247 .name_size = KVM_STATS_NAME_SIZE,
248 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
249 .id_offset = sizeof(struct kvm_stats_header),
250 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
251 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
252 sizeof(kvm_vm_stats_desc),
253};
254
255const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
256 KVM_GENERIC_VCPU_STATS(),
257 STATS_DESC_COUNTER(VCPU, pf_fixed),
258 STATS_DESC_COUNTER(VCPU, pf_guest),
259 STATS_DESC_COUNTER(VCPU, tlb_flush),
260 STATS_DESC_COUNTER(VCPU, invlpg),
261 STATS_DESC_COUNTER(VCPU, exits),
262 STATS_DESC_COUNTER(VCPU, io_exits),
263 STATS_DESC_COUNTER(VCPU, mmio_exits),
264 STATS_DESC_COUNTER(VCPU, signal_exits),
265 STATS_DESC_COUNTER(VCPU, irq_window_exits),
266 STATS_DESC_COUNTER(VCPU, nmi_window_exits),
267 STATS_DESC_COUNTER(VCPU, l1d_flush),
268 STATS_DESC_COUNTER(VCPU, halt_exits),
269 STATS_DESC_COUNTER(VCPU, request_irq_exits),
270 STATS_DESC_COUNTER(VCPU, irq_exits),
271 STATS_DESC_COUNTER(VCPU, host_state_reload),
272 STATS_DESC_COUNTER(VCPU, fpu_reload),
273 STATS_DESC_COUNTER(VCPU, insn_emulation),
274 STATS_DESC_COUNTER(VCPU, insn_emulation_fail),
275 STATS_DESC_COUNTER(VCPU, hypercalls),
276 STATS_DESC_COUNTER(VCPU, irq_injections),
277 STATS_DESC_COUNTER(VCPU, nmi_injections),
278 STATS_DESC_COUNTER(VCPU, req_event),
279 STATS_DESC_COUNTER(VCPU, nested_run),
280 STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
281 STATS_DESC_COUNTER(VCPU, directed_yield_successful),
282 STATS_DESC_ICOUNTER(VCPU, guest_mode)
283};
284
285const struct kvm_stats_header kvm_vcpu_stats_header = {
286 .name_size = KVM_STATS_NAME_SIZE,
287 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
288 .id_offset = sizeof(struct kvm_stats_header),
289 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
290 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
291 sizeof(kvm_vcpu_stats_desc),
292};
293
294u64 __read_mostly host_xcr0;
295u64 __read_mostly supported_xcr0;
296EXPORT_SYMBOL_GPL(supported_xcr0);
297
298static struct kmem_cache *x86_emulator_cache;
299
300
301
302
303
304static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
305{
306 const char *op = write ? "wrmsr" : "rdmsr";
307
308 if (ignore_msrs) {
309 if (report_ignored_msrs)
310 kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
311 op, msr, data);
312
313 return true;
314 } else {
315 kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
316 op, msr, data);
317 return false;
318 }
319}
320
321static struct kmem_cache *kvm_alloc_emulator_cache(void)
322{
323 unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
324 unsigned int size = sizeof(struct x86_emulate_ctxt);
325
326 return kmem_cache_create_usercopy("x86_emulator", size,
327 __alignof__(struct x86_emulate_ctxt),
328 SLAB_ACCOUNT, useroffset,
329 size - useroffset, NULL);
330}
331
332static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
333
334static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
335{
336 int i;
337 for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
338 vcpu->arch.apf.gfns[i] = ~0;
339}
340
341static void kvm_on_user_return(struct user_return_notifier *urn)
342{
343 unsigned slot;
344 struct kvm_user_return_msrs *msrs
345 = container_of(urn, struct kvm_user_return_msrs, urn);
346 struct kvm_user_return_msr_values *values;
347 unsigned long flags;
348
349
350
351
352
353 local_irq_save(flags);
354 if (msrs->registered) {
355 msrs->registered = false;
356 user_return_notifier_unregister(urn);
357 }
358 local_irq_restore(flags);
359 for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
360 values = &msrs->values[slot];
361 if (values->host != values->curr) {
362 wrmsrl(kvm_uret_msrs_list[slot], values->host);
363 values->curr = values->host;
364 }
365 }
366}
367
368static int kvm_probe_user_return_msr(u32 msr)
369{
370 u64 val;
371 int ret;
372
373 preempt_disable();
374 ret = rdmsrl_safe(msr, &val);
375 if (ret)
376 goto out;
377 ret = wrmsrl_safe(msr, val);
378out:
379 preempt_enable();
380 return ret;
381}
382
383int kvm_add_user_return_msr(u32 msr)
384{
385 BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
386
387 if (kvm_probe_user_return_msr(msr))
388 return -1;
389
390 kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
391 return kvm_nr_uret_msrs++;
392}
393EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
394
395int kvm_find_user_return_msr(u32 msr)
396{
397 int i;
398
399 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
400 if (kvm_uret_msrs_list[i] == msr)
401 return i;
402 }
403 return -1;
404}
405EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
406
407static void kvm_user_return_msr_cpu_online(void)
408{
409 unsigned int cpu = smp_processor_id();
410 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
411 u64 value;
412 int i;
413
414 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
415 rdmsrl_safe(kvm_uret_msrs_list[i], &value);
416 msrs->values[i].host = value;
417 msrs->values[i].curr = value;
418 }
419}
420
421int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
422{
423 unsigned int cpu = smp_processor_id();
424 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
425 int err;
426
427 value = (value & mask) | (msrs->values[slot].host & ~mask);
428 if (value == msrs->values[slot].curr)
429 return 0;
430 err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
431 if (err)
432 return 1;
433
434 msrs->values[slot].curr = value;
435 if (!msrs->registered) {
436 msrs->urn.on_user_return = kvm_on_user_return;
437 user_return_notifier_register(&msrs->urn);
438 msrs->registered = true;
439 }
440 return 0;
441}
442EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
443
444static void drop_user_return_notifiers(void)
445{
446 unsigned int cpu = smp_processor_id();
447 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
448
449 if (msrs->registered)
450 kvm_on_user_return(&msrs->urn);
451}
452
453u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
454{
455 return vcpu->arch.apic_base;
456}
457EXPORT_SYMBOL_GPL(kvm_get_apic_base);
458
459enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
460{
461 return kvm_apic_mode(kvm_get_apic_base(vcpu));
462}
463EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
464
465int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
466{
467 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
468 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
469 u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
470 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
471
472 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
473 return 1;
474 if (!msr_info->host_initiated) {
475 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
476 return 1;
477 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
478 return 1;
479 }
480
481 kvm_lapic_set_base(vcpu, msr_info->data);
482 kvm_recalculate_apic_map(vcpu->kvm);
483 return 0;
484}
485EXPORT_SYMBOL_GPL(kvm_set_apic_base);
486
487
488
489
490
491
492
493
494noinstr void kvm_spurious_fault(void)
495{
496
497 BUG_ON(!kvm_rebooting);
498}
499EXPORT_SYMBOL_GPL(kvm_spurious_fault);
500
501#define EXCPT_BENIGN 0
502#define EXCPT_CONTRIBUTORY 1
503#define EXCPT_PF 2
504
505static int exception_class(int vector)
506{
507 switch (vector) {
508 case PF_VECTOR:
509 return EXCPT_PF;
510 case DE_VECTOR:
511 case TS_VECTOR:
512 case NP_VECTOR:
513 case SS_VECTOR:
514 case GP_VECTOR:
515 return EXCPT_CONTRIBUTORY;
516 default:
517 break;
518 }
519 return EXCPT_BENIGN;
520}
521
522#define EXCPT_FAULT 0
523#define EXCPT_TRAP 1
524#define EXCPT_ABORT 2
525#define EXCPT_INTERRUPT 3
526
527static int exception_type(int vector)
528{
529 unsigned int mask;
530
531 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
532 return EXCPT_INTERRUPT;
533
534 mask = 1 << vector;
535
536
537 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
538 return EXCPT_TRAP;
539
540 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
541 return EXCPT_ABORT;
542
543
544 return EXCPT_FAULT;
545}
546
547void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
548{
549 unsigned nr = vcpu->arch.exception.nr;
550 bool has_payload = vcpu->arch.exception.has_payload;
551 unsigned long payload = vcpu->arch.exception.payload;
552
553 if (!has_payload)
554 return;
555
556 switch (nr) {
557 case DB_VECTOR:
558
559
560
561
562
563 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580 vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
581 vcpu->arch.dr6 |= payload;
582 vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW;
583
584
585
586
587
588
589
590 vcpu->arch.dr6 &= ~BIT(12);
591 break;
592 case PF_VECTOR:
593 vcpu->arch.cr2 = payload;
594 break;
595 }
596
597 vcpu->arch.exception.has_payload = false;
598 vcpu->arch.exception.payload = 0;
599}
600EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
601
602static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
603 unsigned nr, bool has_error, u32 error_code,
604 bool has_payload, unsigned long payload, bool reinject)
605{
606 u32 prev_nr;
607 int class1, class2;
608
609 kvm_make_request(KVM_REQ_EVENT, vcpu);
610
611 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
612 queue:
613 if (reinject) {
614
615
616
617
618
619
620
621
622 WARN_ON_ONCE(vcpu->arch.exception.pending);
623 vcpu->arch.exception.injected = true;
624 if (WARN_ON_ONCE(has_payload)) {
625
626
627
628
629 has_payload = false;
630 payload = 0;
631 }
632 } else {
633 vcpu->arch.exception.pending = true;
634 vcpu->arch.exception.injected = false;
635 }
636 vcpu->arch.exception.has_error_code = has_error;
637 vcpu->arch.exception.nr = nr;
638 vcpu->arch.exception.error_code = error_code;
639 vcpu->arch.exception.has_payload = has_payload;
640 vcpu->arch.exception.payload = payload;
641 if (!is_guest_mode(vcpu))
642 kvm_deliver_exception_payload(vcpu);
643 return;
644 }
645
646
647 prev_nr = vcpu->arch.exception.nr;
648 if (prev_nr == DF_VECTOR) {
649
650 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
651 return;
652 }
653 class1 = exception_class(prev_nr);
654 class2 = exception_class(nr);
655 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
656 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
657
658
659
660
661
662 vcpu->arch.exception.pending = true;
663 vcpu->arch.exception.injected = false;
664 vcpu->arch.exception.has_error_code = true;
665 vcpu->arch.exception.nr = DF_VECTOR;
666 vcpu->arch.exception.error_code = 0;
667 vcpu->arch.exception.has_payload = false;
668 vcpu->arch.exception.payload = 0;
669 } else
670
671
672
673 goto queue;
674}
675
676void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
677{
678 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
679}
680EXPORT_SYMBOL_GPL(kvm_queue_exception);
681
682void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
683{
684 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
685}
686EXPORT_SYMBOL_GPL(kvm_requeue_exception);
687
688void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
689 unsigned long payload)
690{
691 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
692}
693EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
694
695static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
696 u32 error_code, unsigned long payload)
697{
698 kvm_multiple_exception(vcpu, nr, true, error_code,
699 true, payload, false);
700}
701
702int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
703{
704 if (err)
705 kvm_inject_gp(vcpu, 0);
706 else
707 return kvm_skip_emulated_instruction(vcpu);
708
709 return 1;
710}
711EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
712
713void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
714{
715 ++vcpu->stat.pf_guest;
716 vcpu->arch.exception.nested_apf =
717 is_guest_mode(vcpu) && fault->async_page_fault;
718 if (vcpu->arch.exception.nested_apf) {
719 vcpu->arch.apf.nested_apf_token = fault->address;
720 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
721 } else {
722 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
723 fault->address);
724 }
725}
726EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
727
728bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
729 struct x86_exception *fault)
730{
731 struct kvm_mmu *fault_mmu;
732 WARN_ON_ONCE(fault->vector != PF_VECTOR);
733
734 fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
735 vcpu->arch.walk_mmu;
736
737
738
739
740
741 if ((fault->error_code & PFERR_PRESENT_MASK) &&
742 !(fault->error_code & PFERR_RSVD_MASK))
743 kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
744 fault_mmu->root_hpa);
745
746 fault_mmu->inject_page_fault(vcpu, fault);
747 return fault->nested_page_fault;
748}
749EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
750
751void kvm_inject_nmi(struct kvm_vcpu *vcpu)
752{
753 atomic_inc(&vcpu->arch.nmi_queued);
754 kvm_make_request(KVM_REQ_NMI, vcpu);
755}
756EXPORT_SYMBOL_GPL(kvm_inject_nmi);
757
758void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
759{
760 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
761}
762EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
763
764void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
765{
766 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
767}
768EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
769
770
771
772
773
774bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
775{
776 if (static_call(kvm_x86_get_cpl)(vcpu) <= required_cpl)
777 return true;
778 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
779 return false;
780}
781EXPORT_SYMBOL_GPL(kvm_require_cpl);
782
783bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
784{
785 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
786 return true;
787
788 kvm_queue_exception(vcpu, UD_VECTOR);
789 return false;
790}
791EXPORT_SYMBOL_GPL(kvm_require_dr);
792
793static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
794{
795 return vcpu->arch.reserved_gpa_bits | rsvd_bits(5, 8) | rsvd_bits(1, 2);
796}
797
798
799
800
801int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
802{
803 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
804 gpa_t real_gpa;
805 int i;
806 int ret;
807 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
808
809
810
811
812
813 real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(pdpt_gfn),
814 PFERR_USER_MASK | PFERR_WRITE_MASK, NULL);
815 if (real_gpa == UNMAPPED_GVA)
816 return 0;
817
818
819 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(real_gpa), pdpte,
820 cr3 & GENMASK(11, 5), sizeof(pdpte));
821 if (ret < 0)
822 return 0;
823
824 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
825 if ((pdpte[i] & PT_PRESENT_MASK) &&
826 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
827 return 0;
828 }
829 }
830
831 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
832 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
833 vcpu->arch.pdptrs_from_userspace = false;
834
835 return 1;
836}
837EXPORT_SYMBOL_GPL(load_pdptrs);
838
839void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
840{
841 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
842 kvm_clear_async_pf_completion_queue(vcpu);
843 kvm_async_pf_hash_reset(vcpu);
844 }
845
846 if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
847 kvm_mmu_reset_context(vcpu);
848
849 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
850 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
851 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
852 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
853}
854EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
855
856int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
857{
858 unsigned long old_cr0 = kvm_read_cr0(vcpu);
859 unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
860
861 cr0 |= X86_CR0_ET;
862
863#ifdef CONFIG_X86_64
864 if (cr0 & 0xffffffff00000000UL)
865 return 1;
866#endif
867
868 cr0 &= ~CR0_RESERVED_BITS;
869
870 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
871 return 1;
872
873 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
874 return 1;
875
876#ifdef CONFIG_X86_64
877 if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
878 (cr0 & X86_CR0_PG)) {
879 int cs_db, cs_l;
880
881 if (!is_pae(vcpu))
882 return 1;
883 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
884 if (cs_l)
885 return 1;
886 }
887#endif
888 if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
889 is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
890 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
891 return 1;
892
893 if (!(cr0 & X86_CR0_PG) &&
894 (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
895 return 1;
896
897 static_call(kvm_x86_set_cr0)(vcpu, cr0);
898
899 kvm_post_set_cr0(vcpu, old_cr0, cr0);
900
901 return 0;
902}
903EXPORT_SYMBOL_GPL(kvm_set_cr0);
904
905void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
906{
907 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
908}
909EXPORT_SYMBOL_GPL(kvm_lmsw);
910
911void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
912{
913 if (vcpu->arch.guest_state_protected)
914 return;
915
916 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
917
918 if (vcpu->arch.xcr0 != host_xcr0)
919 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
920
921 if (vcpu->arch.xsaves_enabled &&
922 vcpu->arch.ia32_xss != host_xss)
923 wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
924 }
925
926 if (static_cpu_has(X86_FEATURE_PKU) &&
927 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
928 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
929 vcpu->arch.pkru != vcpu->arch.host_pkru)
930 write_pkru(vcpu->arch.pkru);
931}
932EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
933
934void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
935{
936 if (vcpu->arch.guest_state_protected)
937 return;
938
939 if (static_cpu_has(X86_FEATURE_PKU) &&
940 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
941 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
942 vcpu->arch.pkru = rdpkru();
943 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
944 write_pkru(vcpu->arch.host_pkru);
945 }
946
947 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
948
949 if (vcpu->arch.xcr0 != host_xcr0)
950 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
951
952 if (vcpu->arch.xsaves_enabled &&
953 vcpu->arch.ia32_xss != host_xss)
954 wrmsrl(MSR_IA32_XSS, host_xss);
955 }
956
957}
958EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
959
960static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
961{
962 u64 xcr0 = xcr;
963 u64 old_xcr0 = vcpu->arch.xcr0;
964 u64 valid_bits;
965
966
967 if (index != XCR_XFEATURE_ENABLED_MASK)
968 return 1;
969 if (!(xcr0 & XFEATURE_MASK_FP))
970 return 1;
971 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
972 return 1;
973
974
975
976
977
978
979 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
980 if (xcr0 & ~valid_bits)
981 return 1;
982
983 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
984 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
985 return 1;
986
987 if (xcr0 & XFEATURE_MASK_AVX512) {
988 if (!(xcr0 & XFEATURE_MASK_YMM))
989 return 1;
990 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
991 return 1;
992 }
993 vcpu->arch.xcr0 = xcr0;
994
995 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
996 kvm_update_cpuid_runtime(vcpu);
997 return 0;
998}
999
1000int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
1001{
1002 if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
1003 __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
1004 kvm_inject_gp(vcpu, 0);
1005 return 1;
1006 }
1007
1008 return kvm_skip_emulated_instruction(vcpu);
1009}
1010EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
1011
1012bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1013{
1014 if (cr4 & cr4_reserved_bits)
1015 return false;
1016
1017 if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
1018 return false;
1019
1020 return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
1021}
1022EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
1023
1024void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
1025{
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042 if ((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS)
1043 kvm_mmu_reset_context(vcpu);
1044 else if ((cr4 ^ old_cr4) & X86_CR4_PCIDE)
1045 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
1046 else if ((cr4 ^ old_cr4) & X86_CR4_PGE)
1047 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1048}
1049EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
1050
1051int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1052{
1053 unsigned long old_cr4 = kvm_read_cr4(vcpu);
1054 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
1055 X86_CR4_SMEP;
1056
1057 if (!kvm_is_valid_cr4(vcpu, cr4))
1058 return 1;
1059
1060 if (is_long_mode(vcpu)) {
1061 if (!(cr4 & X86_CR4_PAE))
1062 return 1;
1063 if ((cr4 ^ old_cr4) & X86_CR4_LA57)
1064 return 1;
1065 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
1066 && ((cr4 ^ old_cr4) & pdptr_bits)
1067 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
1068 kvm_read_cr3(vcpu)))
1069 return 1;
1070
1071 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
1072 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
1073 return 1;
1074
1075
1076 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
1077 return 1;
1078 }
1079
1080 static_call(kvm_x86_set_cr4)(vcpu, cr4);
1081
1082 kvm_post_set_cr4(vcpu, old_cr4, cr4);
1083
1084 return 0;
1085}
1086EXPORT_SYMBOL_GPL(kvm_set_cr4);
1087
1088static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
1089{
1090 struct kvm_mmu *mmu = vcpu->arch.mmu;
1091 unsigned long roots_to_free = 0;
1092 int i;
1093
1094
1095
1096
1097
1098
1099
1100
1101 if (unlikely(tdp_enabled)) {
1102 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1103 return;
1104 }
1105
1106
1107
1108
1109
1110
1111 if (kvm_get_active_pcid(vcpu) == pcid) {
1112 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
1113 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1114 }
1115
1116
1117
1118
1119
1120
1121 if (!kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
1122 return;
1123
1124 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
1125 if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
1126 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
1127
1128 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
1129}
1130
1131int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1132{
1133 bool skip_tlb_flush = false;
1134 unsigned long pcid = 0;
1135#ifdef CONFIG_X86_64
1136 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
1137
1138 if (pcid_enabled) {
1139 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
1140 cr3 &= ~X86_CR3_PCID_NOFLUSH;
1141 pcid = cr3 & X86_CR3_PCID_MASK;
1142 }
1143#endif
1144
1145
1146 if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu))
1147 goto handle_tlb_flush;
1148
1149
1150
1151
1152
1153
1154 if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
1155 return 1;
1156
1157 if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
1158 return 1;
1159
1160 if (cr3 != kvm_read_cr3(vcpu))
1161 kvm_mmu_new_pgd(vcpu, cr3);
1162
1163 vcpu->arch.cr3 = cr3;
1164 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
1165
1166handle_tlb_flush:
1167
1168
1169
1170
1171
1172
1173
1174 if (!skip_tlb_flush)
1175 kvm_invalidate_pcid(vcpu, pcid);
1176
1177 return 0;
1178}
1179EXPORT_SYMBOL_GPL(kvm_set_cr3);
1180
1181int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
1182{
1183 if (cr8 & CR8_RESERVED_BITS)
1184 return 1;
1185 if (lapic_in_kernel(vcpu))
1186 kvm_lapic_set_tpr(vcpu, cr8);
1187 else
1188 vcpu->arch.cr8 = cr8;
1189 return 0;
1190}
1191EXPORT_SYMBOL_GPL(kvm_set_cr8);
1192
1193unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1194{
1195 if (lapic_in_kernel(vcpu))
1196 return kvm_lapic_get_cr8(vcpu);
1197 else
1198 return vcpu->arch.cr8;
1199}
1200EXPORT_SYMBOL_GPL(kvm_get_cr8);
1201
1202static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1203{
1204 int i;
1205
1206 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1207 for (i = 0; i < KVM_NR_DB_REGS; i++)
1208 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1209 }
1210}
1211
1212void kvm_update_dr7(struct kvm_vcpu *vcpu)
1213{
1214 unsigned long dr7;
1215
1216 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1217 dr7 = vcpu->arch.guest_debug_dr7;
1218 else
1219 dr7 = vcpu->arch.dr7;
1220 static_call(kvm_x86_set_dr7)(vcpu, dr7);
1221 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1222 if (dr7 & DR7_BP_EN_MASK)
1223 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1224}
1225EXPORT_SYMBOL_GPL(kvm_update_dr7);
1226
1227static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1228{
1229 u64 fixed = DR6_FIXED_1;
1230
1231 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1232 fixed |= DR6_RTM;
1233
1234 if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
1235 fixed |= DR6_BUS_LOCK;
1236 return fixed;
1237}
1238
1239int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1240{
1241 size_t size = ARRAY_SIZE(vcpu->arch.db);
1242
1243 switch (dr) {
1244 case 0 ... 3:
1245 vcpu->arch.db[array_index_nospec(dr, size)] = val;
1246 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1247 vcpu->arch.eff_db[dr] = val;
1248 break;
1249 case 4:
1250 case 6:
1251 if (!kvm_dr6_valid(val))
1252 return 1;
1253 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1254 break;
1255 case 5:
1256 default:
1257 if (!kvm_dr7_valid(val))
1258 return 1;
1259 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1260 kvm_update_dr7(vcpu);
1261 break;
1262 }
1263
1264 return 0;
1265}
1266EXPORT_SYMBOL_GPL(kvm_set_dr);
1267
1268void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1269{
1270 size_t size = ARRAY_SIZE(vcpu->arch.db);
1271
1272 switch (dr) {
1273 case 0 ... 3:
1274 *val = vcpu->arch.db[array_index_nospec(dr, size)];
1275 break;
1276 case 4:
1277 case 6:
1278 *val = vcpu->arch.dr6;
1279 break;
1280 case 5:
1281 default:
1282 *val = vcpu->arch.dr7;
1283 break;
1284 }
1285}
1286EXPORT_SYMBOL_GPL(kvm_get_dr);
1287
1288int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
1289{
1290 u32 ecx = kvm_rcx_read(vcpu);
1291 u64 data;
1292
1293 if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
1294 kvm_inject_gp(vcpu, 0);
1295 return 1;
1296 }
1297
1298 kvm_rax_write(vcpu, (u32)data);
1299 kvm_rdx_write(vcpu, data >> 32);
1300 return kvm_skip_emulated_instruction(vcpu);
1301}
1302EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316static const u32 msrs_to_save_all[] = {
1317 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1318 MSR_STAR,
1319#ifdef CONFIG_X86_64
1320 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1321#endif
1322 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1323 MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1324 MSR_IA32_SPEC_CTRL,
1325 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1326 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1327 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1328 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1329 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1330 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1331 MSR_IA32_UMWAIT_CONTROL,
1332
1333 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1334 MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
1335 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1336 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1337 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1338 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1339 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1340 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1341 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1342 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1343 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1344 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1345 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1346 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1347 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1348 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1349 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1350 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1351 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1352 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1353 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1354 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1355
1356 MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
1357 MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
1358 MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
1359 MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
1360 MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
1361 MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
1362};
1363
1364static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1365static unsigned num_msrs_to_save;
1366
1367static const u32 emulated_msrs_all[] = {
1368 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1369 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1370 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1371 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1372 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1373 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1374 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1375 HV_X64_MSR_RESET,
1376 HV_X64_MSR_VP_INDEX,
1377 HV_X64_MSR_VP_RUNTIME,
1378 HV_X64_MSR_SCONTROL,
1379 HV_X64_MSR_STIMER0_CONFIG,
1380 HV_X64_MSR_VP_ASSIST_PAGE,
1381 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1382 HV_X64_MSR_TSC_EMULATION_STATUS,
1383 HV_X64_MSR_SYNDBG_OPTIONS,
1384 HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
1385 HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
1386 HV_X64_MSR_SYNDBG_PENDING_BUFFER,
1387
1388 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1389 MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
1390
1391 MSR_IA32_TSC_ADJUST,
1392 MSR_IA32_TSC_DEADLINE,
1393 MSR_IA32_ARCH_CAPABILITIES,
1394 MSR_IA32_PERF_CAPABILITIES,
1395 MSR_IA32_MISC_ENABLE,
1396 MSR_IA32_MCG_STATUS,
1397 MSR_IA32_MCG_CTL,
1398 MSR_IA32_MCG_EXT_CTL,
1399 MSR_IA32_SMBASE,
1400 MSR_SMI_COUNT,
1401 MSR_PLATFORM_INFO,
1402 MSR_MISC_FEATURES_ENABLES,
1403 MSR_AMD64_VIRT_SPEC_CTRL,
1404 MSR_AMD64_TSC_RATIO,
1405 MSR_IA32_POWER_CTL,
1406 MSR_IA32_UCODE_REV,
1407
1408
1409
1410
1411
1412
1413
1414
1415 MSR_IA32_VMX_BASIC,
1416 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1417 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1418 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1419 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1420 MSR_IA32_VMX_MISC,
1421 MSR_IA32_VMX_CR0_FIXED0,
1422 MSR_IA32_VMX_CR4_FIXED0,
1423 MSR_IA32_VMX_VMCS_ENUM,
1424 MSR_IA32_VMX_PROCBASED_CTLS2,
1425 MSR_IA32_VMX_EPT_VPID_CAP,
1426 MSR_IA32_VMX_VMFUNC,
1427
1428 MSR_K7_HWCR,
1429 MSR_KVM_POLL_CONTROL,
1430};
1431
1432static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1433static unsigned num_emulated_msrs;
1434
1435
1436
1437
1438
1439static const u32 msr_based_features_all[] = {
1440 MSR_IA32_VMX_BASIC,
1441 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1442 MSR_IA32_VMX_PINBASED_CTLS,
1443 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1444 MSR_IA32_VMX_PROCBASED_CTLS,
1445 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1446 MSR_IA32_VMX_EXIT_CTLS,
1447 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1448 MSR_IA32_VMX_ENTRY_CTLS,
1449 MSR_IA32_VMX_MISC,
1450 MSR_IA32_VMX_CR0_FIXED0,
1451 MSR_IA32_VMX_CR0_FIXED1,
1452 MSR_IA32_VMX_CR4_FIXED0,
1453 MSR_IA32_VMX_CR4_FIXED1,
1454 MSR_IA32_VMX_VMCS_ENUM,
1455 MSR_IA32_VMX_PROCBASED_CTLS2,
1456 MSR_IA32_VMX_EPT_VPID_CAP,
1457 MSR_IA32_VMX_VMFUNC,
1458
1459 MSR_F10H_DECFG,
1460 MSR_IA32_UCODE_REV,
1461 MSR_IA32_ARCH_CAPABILITIES,
1462 MSR_IA32_PERF_CAPABILITIES,
1463};
1464
1465static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1466static unsigned int num_msr_based_features;
1467
1468static u64 kvm_get_arch_capabilities(void)
1469{
1470 u64 data = 0;
1471
1472 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1473 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1474
1475
1476
1477
1478
1479
1480
1481 data |= ARCH_CAP_PSCHANGE_MC_NO;
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1493 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1494
1495 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1496 data |= ARCH_CAP_RDCL_NO;
1497 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1498 data |= ARCH_CAP_SSB_NO;
1499 if (!boot_cpu_has_bug(X86_BUG_MDS))
1500 data |= ARCH_CAP_MDS_NO;
1501
1502 if (!boot_cpu_has(X86_FEATURE_RTM)) {
1503
1504
1505
1506
1507
1508
1509
1510 data &= ~ARCH_CAP_TAA_NO;
1511 } else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
1512 data |= ARCH_CAP_TAA_NO;
1513 } else {
1514
1515
1516
1517
1518
1519 }
1520
1521 return data;
1522}
1523
1524static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1525{
1526 switch (msr->index) {
1527 case MSR_IA32_ARCH_CAPABILITIES:
1528 msr->data = kvm_get_arch_capabilities();
1529 break;
1530 case MSR_IA32_UCODE_REV:
1531 rdmsrl_safe(msr->index, &msr->data);
1532 break;
1533 default:
1534 return static_call(kvm_x86_get_msr_feature)(msr);
1535 }
1536 return 0;
1537}
1538
1539static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1540{
1541 struct kvm_msr_entry msr;
1542 int r;
1543
1544 msr.index = index;
1545 r = kvm_get_msr_feature(&msr);
1546
1547 if (r == KVM_MSR_RET_INVALID) {
1548
1549 *data = 0;
1550 if (kvm_msr_ignored_check(index, 0, false))
1551 r = 0;
1552 }
1553
1554 if (r)
1555 return r;
1556
1557 *data = msr.data;
1558
1559 return 0;
1560}
1561
1562static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1563{
1564 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1565 return false;
1566
1567 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1568 return false;
1569
1570 if (efer & (EFER_LME | EFER_LMA) &&
1571 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1572 return false;
1573
1574 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1575 return false;
1576
1577 return true;
1578
1579}
1580bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1581{
1582 if (efer & efer_reserved_bits)
1583 return false;
1584
1585 return __kvm_valid_efer(vcpu, efer);
1586}
1587EXPORT_SYMBOL_GPL(kvm_valid_efer);
1588
1589static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1590{
1591 u64 old_efer = vcpu->arch.efer;
1592 u64 efer = msr_info->data;
1593 int r;
1594
1595 if (efer & efer_reserved_bits)
1596 return 1;
1597
1598 if (!msr_info->host_initiated) {
1599 if (!__kvm_valid_efer(vcpu, efer))
1600 return 1;
1601
1602 if (is_paging(vcpu) &&
1603 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1604 return 1;
1605 }
1606
1607 efer &= ~EFER_LMA;
1608 efer |= vcpu->arch.efer & EFER_LMA;
1609
1610 r = static_call(kvm_x86_set_efer)(vcpu, efer);
1611 if (r) {
1612 WARN_ON(r > 0);
1613 return r;
1614 }
1615
1616
1617 if ((efer ^ old_efer) & EFER_NX)
1618 kvm_mmu_reset_context(vcpu);
1619
1620 return 0;
1621}
1622
1623void kvm_enable_efer_bits(u64 mask)
1624{
1625 efer_reserved_bits &= ~mask;
1626}
1627EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1628
1629bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
1630{
1631 struct kvm_x86_msr_filter *msr_filter;
1632 struct msr_bitmap_range *ranges;
1633 struct kvm *kvm = vcpu->kvm;
1634 bool allowed;
1635 int idx;
1636 u32 i;
1637
1638
1639 if (index >= 0x800 && index <= 0x8ff)
1640 return true;
1641
1642 idx = srcu_read_lock(&kvm->srcu);
1643
1644 msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
1645 if (!msr_filter) {
1646 allowed = true;
1647 goto out;
1648 }
1649
1650 allowed = msr_filter->default_allow;
1651 ranges = msr_filter->ranges;
1652
1653 for (i = 0; i < msr_filter->count; i++) {
1654 u32 start = ranges[i].base;
1655 u32 end = start + ranges[i].nmsrs;
1656 u32 flags = ranges[i].flags;
1657 unsigned long *bitmap = ranges[i].bitmap;
1658
1659 if ((index >= start) && (index < end) && (flags & type)) {
1660 allowed = !!test_bit(index - start, bitmap);
1661 break;
1662 }
1663 }
1664
1665out:
1666 srcu_read_unlock(&kvm->srcu, idx);
1667
1668 return allowed;
1669}
1670EXPORT_SYMBOL_GPL(kvm_msr_allowed);
1671
1672
1673
1674
1675
1676
1677
1678static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1679 bool host_initiated)
1680{
1681 struct msr_data msr;
1682
1683 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
1684 return KVM_MSR_RET_FILTERED;
1685
1686 switch (index) {
1687 case MSR_FS_BASE:
1688 case MSR_GS_BASE:
1689 case MSR_KERNEL_GS_BASE:
1690 case MSR_CSTAR:
1691 case MSR_LSTAR:
1692 if (is_noncanonical_address(data, vcpu))
1693 return 1;
1694 break;
1695 case MSR_IA32_SYSENTER_EIP:
1696 case MSR_IA32_SYSENTER_ESP:
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
1710 break;
1711 case MSR_TSC_AUX:
1712 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1713 return 1;
1714
1715 if (!host_initiated &&
1716 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1717 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1718 return 1;
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729 if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
1730 return 1;
1731
1732 data = (u32)data;
1733 break;
1734 }
1735
1736 msr.data = data;
1737 msr.index = index;
1738 msr.host_initiated = host_initiated;
1739
1740 return static_call(kvm_x86_set_msr)(vcpu, &msr);
1741}
1742
1743static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
1744 u32 index, u64 data, bool host_initiated)
1745{
1746 int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
1747
1748 if (ret == KVM_MSR_RET_INVALID)
1749 if (kvm_msr_ignored_check(index, data, true))
1750 ret = 0;
1751
1752 return ret;
1753}
1754
1755
1756
1757
1758
1759
1760
1761int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1762 bool host_initiated)
1763{
1764 struct msr_data msr;
1765 int ret;
1766
1767 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
1768 return KVM_MSR_RET_FILTERED;
1769
1770 switch (index) {
1771 case MSR_TSC_AUX:
1772 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1773 return 1;
1774
1775 if (!host_initiated &&
1776 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1777 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1778 return 1;
1779 break;
1780 }
1781
1782 msr.index = index;
1783 msr.host_initiated = host_initiated;
1784
1785 ret = static_call(kvm_x86_get_msr)(vcpu, &msr);
1786 if (!ret)
1787 *data = msr.data;
1788 return ret;
1789}
1790
1791static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
1792 u32 index, u64 *data, bool host_initiated)
1793{
1794 int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
1795
1796 if (ret == KVM_MSR_RET_INVALID) {
1797
1798 *data = 0;
1799 if (kvm_msr_ignored_check(index, 0, false))
1800 ret = 0;
1801 }
1802
1803 return ret;
1804}
1805
1806int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1807{
1808 return kvm_get_msr_ignored_check(vcpu, index, data, false);
1809}
1810EXPORT_SYMBOL_GPL(kvm_get_msr);
1811
1812int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1813{
1814 return kvm_set_msr_ignored_check(vcpu, index, data, false);
1815}
1816EXPORT_SYMBOL_GPL(kvm_set_msr);
1817
1818static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
1819{
1820 int err = vcpu->run->msr.error;
1821 if (!err) {
1822 kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
1823 kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
1824 }
1825
1826 return static_call(kvm_x86_complete_emulated_msr)(vcpu, err);
1827}
1828
1829static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
1830{
1831 return static_call(kvm_x86_complete_emulated_msr)(vcpu, vcpu->run->msr.error);
1832}
1833
1834static u64 kvm_msr_reason(int r)
1835{
1836 switch (r) {
1837 case KVM_MSR_RET_INVALID:
1838 return KVM_MSR_EXIT_REASON_UNKNOWN;
1839 case KVM_MSR_RET_FILTERED:
1840 return KVM_MSR_EXIT_REASON_FILTER;
1841 default:
1842 return KVM_MSR_EXIT_REASON_INVAL;
1843 }
1844}
1845
1846static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
1847 u32 exit_reason, u64 data,
1848 int (*completion)(struct kvm_vcpu *vcpu),
1849 int r)
1850{
1851 u64 msr_reason = kvm_msr_reason(r);
1852
1853
1854 if (!(vcpu->kvm->arch.user_space_msr_mask & msr_reason))
1855 return 0;
1856
1857 vcpu->run->exit_reason = exit_reason;
1858 vcpu->run->msr.error = 0;
1859 memset(vcpu->run->msr.pad, 0, sizeof(vcpu->run->msr.pad));
1860 vcpu->run->msr.reason = msr_reason;
1861 vcpu->run->msr.index = index;
1862 vcpu->run->msr.data = data;
1863 vcpu->arch.complete_userspace_io = completion;
1864
1865 return 1;
1866}
1867
1868static int kvm_get_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int r)
1869{
1870 return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_RDMSR, 0,
1871 complete_emulated_rdmsr, r);
1872}
1873
1874static int kvm_set_msr_user_space(struct kvm_vcpu *vcpu, u32 index, u64 data, int r)
1875{
1876 return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_WRMSR, data,
1877 complete_emulated_wrmsr, r);
1878}
1879
1880int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1881{
1882 u32 ecx = kvm_rcx_read(vcpu);
1883 u64 data;
1884 int r;
1885
1886 r = kvm_get_msr(vcpu, ecx, &data);
1887
1888
1889 if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
1890
1891 return 0;
1892 }
1893
1894 if (!r) {
1895 trace_kvm_msr_read(ecx, data);
1896
1897 kvm_rax_write(vcpu, data & -1u);
1898 kvm_rdx_write(vcpu, (data >> 32) & -1u);
1899 } else {
1900 trace_kvm_msr_read_ex(ecx);
1901 }
1902
1903 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
1904}
1905EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
1906
1907int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
1908{
1909 u32 ecx = kvm_rcx_read(vcpu);
1910 u64 data = kvm_read_edx_eax(vcpu);
1911 int r;
1912
1913 r = kvm_set_msr(vcpu, ecx, data);
1914
1915
1916 if (r && kvm_set_msr_user_space(vcpu, ecx, data, r))
1917
1918 return 0;
1919
1920
1921 if (r < 0)
1922 return r;
1923
1924 if (!r)
1925 trace_kvm_msr_write(ecx, data);
1926 else
1927 trace_kvm_msr_write_ex(ecx, data);
1928
1929 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
1930}
1931EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
1932
1933int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
1934{
1935 return kvm_skip_emulated_instruction(vcpu);
1936}
1937EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
1938
1939int kvm_emulate_invd(struct kvm_vcpu *vcpu)
1940{
1941
1942 return kvm_emulate_as_nop(vcpu);
1943}
1944EXPORT_SYMBOL_GPL(kvm_emulate_invd);
1945
1946int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
1947{
1948 pr_warn_once("kvm: MWAIT instruction emulated as NOP!\n");
1949 return kvm_emulate_as_nop(vcpu);
1950}
1951EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
1952
1953int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
1954{
1955 kvm_queue_exception(vcpu, UD_VECTOR);
1956 return 1;
1957}
1958EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
1959
1960int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
1961{
1962 pr_warn_once("kvm: MONITOR instruction emulated as NOP!\n");
1963 return kvm_emulate_as_nop(vcpu);
1964}
1965EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
1966
1967static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
1968{
1969 xfer_to_guest_mode_prepare();
1970 return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
1971 xfer_to_guest_mode_work_pending();
1972}
1973
1974
1975
1976
1977
1978
1979
1980
1981static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
1982{
1983 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
1984 return 1;
1985
1986 if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
1987 ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
1988 ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
1989 ((u32)(data >> 32) != X2APIC_BROADCAST)) {
1990
1991 data &= ~(1 << 12);
1992 kvm_apic_send_ipi(vcpu->arch.apic, (u32)data, (u32)(data >> 32));
1993 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
1994 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR, (u32)data);
1995 trace_kvm_apic_write(APIC_ICR, (u32)data);
1996 return 0;
1997 }
1998
1999 return 1;
2000}
2001
2002static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
2003{
2004 if (!kvm_can_use_hv_timer(vcpu))
2005 return 1;
2006
2007 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2008 return 0;
2009}
2010
2011fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
2012{
2013 u32 msr = kvm_rcx_read(vcpu);
2014 u64 data;
2015 fastpath_t ret = EXIT_FASTPATH_NONE;
2016
2017 switch (msr) {
2018 case APIC_BASE_MSR + (APIC_ICR >> 4):
2019 data = kvm_read_edx_eax(vcpu);
2020 if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
2021 kvm_skip_emulated_instruction(vcpu);
2022 ret = EXIT_FASTPATH_EXIT_HANDLED;
2023 }
2024 break;
2025 case MSR_IA32_TSC_DEADLINE:
2026 data = kvm_read_edx_eax(vcpu);
2027 if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
2028 kvm_skip_emulated_instruction(vcpu);
2029 ret = EXIT_FASTPATH_REENTER_GUEST;
2030 }
2031 break;
2032 default:
2033 break;
2034 }
2035
2036 if (ret != EXIT_FASTPATH_NONE)
2037 trace_kvm_msr_write(msr, data);
2038
2039 return ret;
2040}
2041EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
2042
2043
2044
2045
2046static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2047{
2048 return kvm_get_msr_ignored_check(vcpu, index, data, true);
2049}
2050
2051static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2052{
2053 return kvm_set_msr_ignored_check(vcpu, index, *data, true);
2054}
2055
2056#ifdef CONFIG_X86_64
2057struct pvclock_clock {
2058 int vclock_mode;
2059 u64 cycle_last;
2060 u64 mask;
2061 u32 mult;
2062 u32 shift;
2063 u64 base_cycles;
2064 u64 offset;
2065};
2066
2067struct pvclock_gtod_data {
2068 seqcount_t seq;
2069
2070 struct pvclock_clock clock;
2071 struct pvclock_clock raw_clock;
2072
2073 ktime_t offs_boot;
2074 u64 wall_time_sec;
2075};
2076
2077static struct pvclock_gtod_data pvclock_gtod_data;
2078
2079static void update_pvclock_gtod(struct timekeeper *tk)
2080{
2081 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
2082
2083 write_seqcount_begin(&vdata->seq);
2084
2085
2086 vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
2087 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
2088 vdata->clock.mask = tk->tkr_mono.mask;
2089 vdata->clock.mult = tk->tkr_mono.mult;
2090 vdata->clock.shift = tk->tkr_mono.shift;
2091 vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
2092 vdata->clock.offset = tk->tkr_mono.base;
2093
2094 vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
2095 vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
2096 vdata->raw_clock.mask = tk->tkr_raw.mask;
2097 vdata->raw_clock.mult = tk->tkr_raw.mult;
2098 vdata->raw_clock.shift = tk->tkr_raw.shift;
2099 vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
2100 vdata->raw_clock.offset = tk->tkr_raw.base;
2101
2102 vdata->wall_time_sec = tk->xtime_sec;
2103
2104 vdata->offs_boot = tk->offs_boot;
2105
2106 write_seqcount_end(&vdata->seq);
2107}
2108
2109static s64 get_kvmclock_base_ns(void)
2110{
2111
2112 return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
2113}
2114#else
2115static s64 get_kvmclock_base_ns(void)
2116{
2117
2118 return ktime_get_boottime_ns();
2119}
2120#endif
2121
2122void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
2123{
2124 int version;
2125 int r;
2126 struct pvclock_wall_clock wc;
2127 u32 wc_sec_hi;
2128 u64 wall_nsec;
2129
2130 if (!wall_clock)
2131 return;
2132
2133 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
2134 if (r)
2135 return;
2136
2137 if (version & 1)
2138 ++version;
2139
2140 ++version;
2141
2142 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
2143 return;
2144
2145
2146
2147
2148
2149
2150 wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
2151
2152 wc.nsec = do_div(wall_nsec, 1000000000);
2153 wc.sec = (u32)wall_nsec;
2154 wc.version = version;
2155
2156 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
2157
2158 if (sec_hi_ofs) {
2159 wc_sec_hi = wall_nsec >> 32;
2160 kvm_write_guest(kvm, wall_clock + sec_hi_ofs,
2161 &wc_sec_hi, sizeof(wc_sec_hi));
2162 }
2163
2164 version++;
2165 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
2166}
2167
2168static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
2169 bool old_msr, bool host_initiated)
2170{
2171 struct kvm_arch *ka = &vcpu->kvm->arch;
2172
2173 if (vcpu->vcpu_id == 0 && !host_initiated) {
2174 if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
2175 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2176
2177 ka->boot_vcpu_runs_old_kvmclock = old_msr;
2178 }
2179
2180 vcpu->arch.time = system_time;
2181 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2182
2183
2184 vcpu->arch.pv_time_enabled = false;
2185 if (!(system_time & 1))
2186 return;
2187
2188 if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
2189 &vcpu->arch.pv_time, system_time & ~1ULL,
2190 sizeof(struct pvclock_vcpu_time_info)))
2191 vcpu->arch.pv_time_enabled = true;
2192
2193 return;
2194}
2195
2196static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
2197{
2198 do_shl32_div32(dividend, divisor);
2199 return dividend;
2200}
2201
2202static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
2203 s8 *pshift, u32 *pmultiplier)
2204{
2205 uint64_t scaled64;
2206 int32_t shift = 0;
2207 uint64_t tps64;
2208 uint32_t tps32;
2209
2210 tps64 = base_hz;
2211 scaled64 = scaled_hz;
2212 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
2213 tps64 >>= 1;
2214 shift--;
2215 }
2216
2217 tps32 = (uint32_t)tps64;
2218 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
2219 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
2220 scaled64 >>= 1;
2221 else
2222 tps32 <<= 1;
2223 shift++;
2224 }
2225
2226 *pshift = shift;
2227 *pmultiplier = div_frac(scaled64, tps32);
2228}
2229
2230#ifdef CONFIG_X86_64
2231static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
2232#endif
2233
2234static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
2235static unsigned long max_tsc_khz;
2236
2237static u32 adjust_tsc_khz(u32 khz, s32 ppm)
2238{
2239 u64 v = (u64)khz * (1000000 + ppm);
2240 do_div(v, 1000000);
2241 return v;
2242}
2243
2244static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier);
2245
2246static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
2247{
2248 u64 ratio;
2249
2250
2251 if (!scale) {
2252 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
2253 return 0;
2254 }
2255
2256
2257 if (!kvm_has_tsc_control) {
2258 if (user_tsc_khz > tsc_khz) {
2259 vcpu->arch.tsc_catchup = 1;
2260 vcpu->arch.tsc_always_catchup = 1;
2261 return 0;
2262 } else {
2263 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
2264 return -1;
2265 }
2266 }
2267
2268
2269 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
2270 user_tsc_khz, tsc_khz);
2271
2272 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
2273 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
2274 user_tsc_khz);
2275 return -1;
2276 }
2277
2278 kvm_vcpu_write_tsc_multiplier(vcpu, ratio);
2279 return 0;
2280}
2281
2282static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
2283{
2284 u32 thresh_lo, thresh_hi;
2285 int use_scaling = 0;
2286
2287
2288 if (user_tsc_khz == 0) {
2289
2290 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
2291 return -1;
2292 }
2293
2294
2295 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
2296 &vcpu->arch.virtual_tsc_shift,
2297 &vcpu->arch.virtual_tsc_mult);
2298 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
2299
2300
2301
2302
2303
2304
2305
2306 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
2307 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
2308 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
2309 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
2310 use_scaling = 1;
2311 }
2312 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
2313}
2314
2315static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
2316{
2317 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
2318 vcpu->arch.virtual_tsc_mult,
2319 vcpu->arch.virtual_tsc_shift);
2320 tsc += vcpu->arch.this_tsc_write;
2321 return tsc;
2322}
2323
2324static inline int gtod_is_based_on_tsc(int mode)
2325{
2326 return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
2327}
2328
2329static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
2330{
2331#ifdef CONFIG_X86_64
2332 bool vcpus_matched;
2333 struct kvm_arch *ka = &vcpu->kvm->arch;
2334 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2335
2336 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2337 atomic_read(&vcpu->kvm->online_vcpus));
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347 if (ka->use_master_clock ||
2348 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
2349 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2350
2351 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
2352 atomic_read(&vcpu->kvm->online_vcpus),
2353 ka->use_master_clock, gtod->clock.vclock_mode);
2354#endif
2355}
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367static inline u64 __scale_tsc(u64 ratio, u64 tsc)
2368{
2369 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
2370}
2371
2372u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio)
2373{
2374 u64 _tsc = tsc;
2375
2376 if (ratio != kvm_default_tsc_scaling_ratio)
2377 _tsc = __scale_tsc(ratio, tsc);
2378
2379 return _tsc;
2380}
2381EXPORT_SYMBOL_GPL(kvm_scale_tsc);
2382
2383static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
2384{
2385 u64 tsc;
2386
2387 tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio);
2388
2389 return target_tsc - tsc;
2390}
2391
2392u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2393{
2394 return vcpu->arch.l1_tsc_offset +
2395 kvm_scale_tsc(vcpu, host_tsc, vcpu->arch.l1_tsc_scaling_ratio);
2396}
2397EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
2398
2399u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
2400{
2401 u64 nested_offset;
2402
2403 if (l2_multiplier == kvm_default_tsc_scaling_ratio)
2404 nested_offset = l1_offset;
2405 else
2406 nested_offset = mul_s64_u64_shr((s64) l1_offset, l2_multiplier,
2407 kvm_tsc_scaling_ratio_frac_bits);
2408
2409 nested_offset += l2_offset;
2410 return nested_offset;
2411}
2412EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
2413
2414u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
2415{
2416 if (l2_multiplier != kvm_default_tsc_scaling_ratio)
2417 return mul_u64_u64_shr(l1_multiplier, l2_multiplier,
2418 kvm_tsc_scaling_ratio_frac_bits);
2419
2420 return l1_multiplier;
2421}
2422EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
2423
2424static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
2425{
2426 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2427 vcpu->arch.l1_tsc_offset,
2428 l1_offset);
2429
2430 vcpu->arch.l1_tsc_offset = l1_offset;
2431
2432
2433
2434
2435
2436
2437 if (is_guest_mode(vcpu))
2438 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2439 l1_offset,
2440 static_call(kvm_x86_get_l2_tsc_offset)(vcpu),
2441 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2442 else
2443 vcpu->arch.tsc_offset = l1_offset;
2444
2445 static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
2446}
2447
2448static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
2449{
2450 vcpu->arch.l1_tsc_scaling_ratio = l1_multiplier;
2451
2452
2453 if (is_guest_mode(vcpu))
2454 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2455 l1_multiplier,
2456 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2457 else
2458 vcpu->arch.tsc_scaling_ratio = l1_multiplier;
2459
2460 if (kvm_has_tsc_control)
2461 static_call(kvm_x86_write_tsc_multiplier)(
2462 vcpu, vcpu->arch.tsc_scaling_ratio);
2463}
2464
2465static inline bool kvm_check_tsc_unstable(void)
2466{
2467#ifdef CONFIG_X86_64
2468
2469
2470
2471
2472 if (pvclock_gtod_data.clock.vclock_mode == VDSO_CLOCKMODE_HVCLOCK)
2473 return false;
2474#endif
2475 return check_tsc_unstable();
2476}
2477
2478
2479
2480
2481
2482
2483static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
2484 u64 ns, bool matched)
2485{
2486 struct kvm *kvm = vcpu->kvm;
2487
2488 lockdep_assert_held(&kvm->arch.tsc_write_lock);
2489
2490
2491
2492
2493
2494 kvm->arch.last_tsc_nsec = ns;
2495 kvm->arch.last_tsc_write = tsc;
2496 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
2497 kvm->arch.last_tsc_offset = offset;
2498
2499 vcpu->arch.last_guest_tsc = tsc;
2500
2501 kvm_vcpu_write_tsc_offset(vcpu, offset);
2502
2503 if (!matched) {
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513 kvm->arch.cur_tsc_generation++;
2514 kvm->arch.cur_tsc_nsec = ns;
2515 kvm->arch.cur_tsc_write = tsc;
2516 kvm->arch.cur_tsc_offset = offset;
2517 kvm->arch.nr_vcpus_matched_tsc = 0;
2518 } else if (vcpu->arch.this_tsc_generation != kvm->arch.cur_tsc_generation) {
2519 kvm->arch.nr_vcpus_matched_tsc++;
2520 }
2521
2522
2523 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
2524 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
2525 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
2526
2527 kvm_track_tsc_matching(vcpu);
2528}
2529
2530static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
2531{
2532 struct kvm *kvm = vcpu->kvm;
2533 u64 offset, ns, elapsed;
2534 unsigned long flags;
2535 bool matched = false;
2536 bool synchronizing = false;
2537
2538 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
2539 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2540 ns = get_kvmclock_base_ns();
2541 elapsed = ns - kvm->arch.last_tsc_nsec;
2542
2543 if (vcpu->arch.virtual_tsc_khz) {
2544 if (data == 0) {
2545
2546
2547
2548
2549
2550 synchronizing = true;
2551 } else {
2552 u64 tsc_exp = kvm->arch.last_tsc_write +
2553 nsec_to_cycles(vcpu, elapsed);
2554 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
2555
2556
2557
2558
2559
2560 synchronizing = data < tsc_exp + tsc_hz &&
2561 data + tsc_hz > tsc_exp;
2562 }
2563 }
2564
2565
2566
2567
2568
2569
2570
2571 if (synchronizing &&
2572 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
2573 if (!kvm_check_tsc_unstable()) {
2574 offset = kvm->arch.cur_tsc_offset;
2575 } else {
2576 u64 delta = nsec_to_cycles(vcpu, elapsed);
2577 data += delta;
2578 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2579 }
2580 matched = true;
2581 }
2582
2583 __kvm_synchronize_tsc(vcpu, offset, data, ns, matched);
2584 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
2585}
2586
2587static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
2588 s64 adjustment)
2589{
2590 u64 tsc_offset = vcpu->arch.l1_tsc_offset;
2591 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
2592}
2593
2594static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
2595{
2596 if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
2597 WARN_ON(adjustment < 0);
2598 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment,
2599 vcpu->arch.l1_tsc_scaling_ratio);
2600 adjust_tsc_offset_guest(vcpu, adjustment);
2601}
2602
2603#ifdef CONFIG_X86_64
2604
2605static u64 read_tsc(void)
2606{
2607 u64 ret = (u64)rdtsc_ordered();
2608 u64 last = pvclock_gtod_data.clock.cycle_last;
2609
2610 if (likely(ret >= last))
2611 return ret;
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621 asm volatile ("");
2622 return last;
2623}
2624
2625static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
2626 int *mode)
2627{
2628 long v;
2629 u64 tsc_pg_val;
2630
2631 switch (clock->vclock_mode) {
2632 case VDSO_CLOCKMODE_HVCLOCK:
2633 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2634 tsc_timestamp);
2635 if (tsc_pg_val != U64_MAX) {
2636
2637 *mode = VDSO_CLOCKMODE_HVCLOCK;
2638 v = (tsc_pg_val - clock->cycle_last) &
2639 clock->mask;
2640 } else {
2641
2642 *mode = VDSO_CLOCKMODE_NONE;
2643 }
2644 break;
2645 case VDSO_CLOCKMODE_TSC:
2646 *mode = VDSO_CLOCKMODE_TSC;
2647 *tsc_timestamp = read_tsc();
2648 v = (*tsc_timestamp - clock->cycle_last) &
2649 clock->mask;
2650 break;
2651 default:
2652 *mode = VDSO_CLOCKMODE_NONE;
2653 }
2654
2655 if (*mode == VDSO_CLOCKMODE_NONE)
2656 *tsc_timestamp = v = 0;
2657
2658 return v * clock->mult;
2659}
2660
2661static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
2662{
2663 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2664 unsigned long seq;
2665 int mode;
2666 u64 ns;
2667
2668 do {
2669 seq = read_seqcount_begin(>od->seq);
2670 ns = gtod->raw_clock.base_cycles;
2671 ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
2672 ns >>= gtod->raw_clock.shift;
2673 ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
2674 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2675 *t = ns;
2676
2677 return mode;
2678}
2679
2680static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2681{
2682 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2683 unsigned long seq;
2684 int mode;
2685 u64 ns;
2686
2687 do {
2688 seq = read_seqcount_begin(>od->seq);
2689 ts->tv_sec = gtod->wall_time_sec;
2690 ns = gtod->clock.base_cycles;
2691 ns += vgettsc(>od->clock, tsc_timestamp, &mode);
2692 ns >>= gtod->clock.shift;
2693 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2694
2695 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2696 ts->tv_nsec = ns;
2697
2698 return mode;
2699}
2700
2701
2702static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2703{
2704
2705 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2706 return false;
2707
2708 return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
2709 tsc_timestamp));
2710}
2711
2712
2713static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2714 u64 *tsc_timestamp)
2715{
2716
2717 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2718 return false;
2719
2720 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2721}
2722#endif
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2766{
2767#ifdef CONFIG_X86_64
2768 struct kvm_arch *ka = &kvm->arch;
2769 int vclock_mode;
2770 bool host_tsc_clocksource, vcpus_matched;
2771
2772 lockdep_assert_held(&kvm->arch.tsc_write_lock);
2773 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2774 atomic_read(&kvm->online_vcpus));
2775
2776
2777
2778
2779
2780 host_tsc_clocksource = kvm_get_time_and_clockread(
2781 &ka->master_kernel_ns,
2782 &ka->master_cycle_now);
2783
2784 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2785 && !ka->backwards_tsc_observed
2786 && !ka->boot_vcpu_runs_old_kvmclock;
2787
2788 if (ka->use_master_clock)
2789 atomic_set(&kvm_guest_has_master_clock, 1);
2790
2791 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2792 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2793 vcpus_matched);
2794#endif
2795}
2796
2797static void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2798{
2799 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2800}
2801
2802static void __kvm_start_pvclock_update(struct kvm *kvm)
2803{
2804 raw_spin_lock_irq(&kvm->arch.tsc_write_lock);
2805 write_seqcount_begin(&kvm->arch.pvclock_sc);
2806}
2807
2808static void kvm_start_pvclock_update(struct kvm *kvm)
2809{
2810 kvm_make_mclock_inprogress_request(kvm);
2811
2812
2813 __kvm_start_pvclock_update(kvm);
2814}
2815
2816static void kvm_end_pvclock_update(struct kvm *kvm)
2817{
2818 struct kvm_arch *ka = &kvm->arch;
2819 struct kvm_vcpu *vcpu;
2820 int i;
2821
2822 write_seqcount_end(&ka->pvclock_sc);
2823 raw_spin_unlock_irq(&ka->tsc_write_lock);
2824 kvm_for_each_vcpu(i, vcpu, kvm)
2825 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2826
2827
2828 kvm_for_each_vcpu(i, vcpu, kvm)
2829 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2830}
2831
2832static void kvm_update_masterclock(struct kvm *kvm)
2833{
2834 kvm_hv_invalidate_tsc_page(kvm);
2835 kvm_start_pvclock_update(kvm);
2836 pvclock_update_vm_gtod_copy(kvm);
2837 kvm_end_pvclock_update(kvm);
2838}
2839
2840
2841static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
2842{
2843 struct kvm_arch *ka = &kvm->arch;
2844 struct pvclock_vcpu_time_info hv_clock;
2845
2846
2847 get_cpu();
2848
2849 data->flags = 0;
2850 if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) {
2851#ifdef CONFIG_X86_64
2852 struct timespec64 ts;
2853
2854 if (kvm_get_walltime_and_clockread(&ts, &data->host_tsc)) {
2855 data->realtime = ts.tv_nsec + NSEC_PER_SEC * ts.tv_sec;
2856 data->flags |= KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC;
2857 } else
2858#endif
2859 data->host_tsc = rdtsc();
2860
2861 data->flags |= KVM_CLOCK_TSC_STABLE;
2862 hv_clock.tsc_timestamp = ka->master_cycle_now;
2863 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2864 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2865 &hv_clock.tsc_shift,
2866 &hv_clock.tsc_to_system_mul);
2867 data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
2868 } else {
2869 data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
2870 }
2871
2872 put_cpu();
2873}
2874
2875static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
2876{
2877 struct kvm_arch *ka = &kvm->arch;
2878 unsigned seq;
2879
2880 do {
2881 seq = read_seqcount_begin(&ka->pvclock_sc);
2882 __get_kvmclock(kvm, data);
2883 } while (read_seqcount_retry(&ka->pvclock_sc, seq));
2884}
2885
2886u64 get_kvmclock_ns(struct kvm *kvm)
2887{
2888 struct kvm_clock_data data;
2889
2890 get_kvmclock(kvm, &data);
2891 return data.clock;
2892}
2893
2894static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
2895 struct gfn_to_hva_cache *cache,
2896 unsigned int offset)
2897{
2898 struct kvm_vcpu_arch *vcpu = &v->arch;
2899 struct pvclock_vcpu_time_info guest_hv_clock;
2900
2901 if (unlikely(kvm_read_guest_offset_cached(v->kvm, cache,
2902 &guest_hv_clock, offset, sizeof(guest_hv_clock))))
2903 return;
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2920
2921 if (guest_hv_clock.version & 1)
2922 ++guest_hv_clock.version;
2923
2924 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2925 kvm_write_guest_offset_cached(v->kvm, cache,
2926 &vcpu->hv_clock, offset,
2927 sizeof(vcpu->hv_clock.version));
2928
2929 smp_wmb();
2930
2931
2932 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2933
2934 if (vcpu->pvclock_set_guest_stopped_request) {
2935 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2936 vcpu->pvclock_set_guest_stopped_request = false;
2937 }
2938
2939 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2940
2941 kvm_write_guest_offset_cached(v->kvm, cache,
2942 &vcpu->hv_clock, offset,
2943 sizeof(vcpu->hv_clock));
2944
2945 smp_wmb();
2946
2947 vcpu->hv_clock.version++;
2948 kvm_write_guest_offset_cached(v->kvm, cache,
2949 &vcpu->hv_clock, offset,
2950 sizeof(vcpu->hv_clock.version));
2951}
2952
2953static int kvm_guest_time_update(struct kvm_vcpu *v)
2954{
2955 unsigned long flags, tgt_tsc_khz;
2956 unsigned seq;
2957 struct kvm_vcpu_arch *vcpu = &v->arch;
2958 struct kvm_arch *ka = &v->kvm->arch;
2959 s64 kernel_ns;
2960 u64 tsc_timestamp, host_tsc;
2961 u8 pvclock_flags;
2962 bool use_master_clock;
2963
2964 kernel_ns = 0;
2965 host_tsc = 0;
2966
2967
2968
2969
2970
2971 do {
2972 seq = read_seqcount_begin(&ka->pvclock_sc);
2973 use_master_clock = ka->use_master_clock;
2974 if (use_master_clock) {
2975 host_tsc = ka->master_cycle_now;
2976 kernel_ns = ka->master_kernel_ns;
2977 }
2978 } while (read_seqcount_retry(&ka->pvclock_sc, seq));
2979
2980
2981 local_irq_save(flags);
2982 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2983 if (unlikely(tgt_tsc_khz == 0)) {
2984 local_irq_restore(flags);
2985 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2986 return 1;
2987 }
2988 if (!use_master_clock) {
2989 host_tsc = rdtsc();
2990 kernel_ns = get_kvmclock_base_ns();
2991 }
2992
2993 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005 if (vcpu->tsc_catchup) {
3006 u64 tsc = compute_guest_tsc(v, kernel_ns);
3007 if (tsc > tsc_timestamp) {
3008 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
3009 tsc_timestamp = tsc;
3010 }
3011 }
3012
3013 local_irq_restore(flags);
3014
3015
3016
3017 if (kvm_has_tsc_control)
3018 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz,
3019 v->arch.l1_tsc_scaling_ratio);
3020
3021 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
3022 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
3023 &vcpu->hv_clock.tsc_shift,
3024 &vcpu->hv_clock.tsc_to_system_mul);
3025 vcpu->hw_tsc_khz = tgt_tsc_khz;
3026 }
3027
3028 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
3029 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
3030 vcpu->last_guest_tsc = tsc_timestamp;
3031
3032
3033 pvclock_flags = 0;
3034 if (use_master_clock)
3035 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
3036
3037 vcpu->hv_clock.flags = pvclock_flags;
3038
3039 if (vcpu->pv_time_enabled)
3040 kvm_setup_pvclock_page(v, &vcpu->pv_time, 0);
3041 if (vcpu->xen.vcpu_info_set)
3042 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_info_cache,
3043 offsetof(struct compat_vcpu_info, time));
3044 if (vcpu->xen.vcpu_time_info_set)
3045 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
3046 if (!v->vcpu_idx)
3047 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
3048 return 0;
3049}
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
3066
3067static void kvmclock_update_fn(struct work_struct *work)
3068{
3069 int i;
3070 struct delayed_work *dwork = to_delayed_work(work);
3071 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3072 kvmclock_update_work);
3073 struct kvm *kvm = container_of(ka, struct kvm, arch);
3074 struct kvm_vcpu *vcpu;
3075
3076 kvm_for_each_vcpu(i, vcpu, kvm) {
3077 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3078 kvm_vcpu_kick(vcpu);
3079 }
3080}
3081
3082static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
3083{
3084 struct kvm *kvm = v->kvm;
3085
3086 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
3087 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
3088 KVMCLOCK_UPDATE_DELAY);
3089}
3090
3091#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
3092
3093static void kvmclock_sync_fn(struct work_struct *work)
3094{
3095 struct delayed_work *dwork = to_delayed_work(work);
3096 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3097 kvmclock_sync_work);
3098 struct kvm *kvm = container_of(ka, struct kvm, arch);
3099
3100 if (!kvmclock_periodic_sync)
3101 return;
3102
3103 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
3104 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
3105 KVMCLOCK_SYNC_PERIOD);
3106}
3107
3108
3109
3110
3111static bool can_set_mci_status(struct kvm_vcpu *vcpu)
3112{
3113
3114 if (guest_cpuid_is_amd_or_hygon(vcpu))
3115 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
3116
3117 return false;
3118}
3119
3120static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3121{
3122 u64 mcg_cap = vcpu->arch.mcg_cap;
3123 unsigned bank_num = mcg_cap & 0xff;
3124 u32 msr = msr_info->index;
3125 u64 data = msr_info->data;
3126
3127 switch (msr) {
3128 case MSR_IA32_MCG_STATUS:
3129 vcpu->arch.mcg_status = data;
3130 break;
3131 case MSR_IA32_MCG_CTL:
3132 if (!(mcg_cap & MCG_CTL_P) &&
3133 (data || !msr_info->host_initiated))
3134 return 1;
3135 if (data != 0 && data != ~(u64)0)
3136 return 1;
3137 vcpu->arch.mcg_ctl = data;
3138 break;
3139 default:
3140 if (msr >= MSR_IA32_MC0_CTL &&
3141 msr < MSR_IA32_MCx_CTL(bank_num)) {
3142 u32 offset = array_index_nospec(
3143 msr - MSR_IA32_MC0_CTL,
3144 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3145
3146
3147
3148
3149
3150
3151 if ((offset & 0x3) == 0 &&
3152 data != 0 && (data | (1 << 10)) != ~(u64)0)
3153 return -1;
3154
3155
3156 if (!msr_info->host_initiated &&
3157 (offset & 0x3) == 1 && data != 0) {
3158 if (!can_set_mci_status(vcpu))
3159 return -1;
3160 }
3161
3162 vcpu->arch.mce_banks[offset] = data;
3163 break;
3164 }
3165 return 1;
3166 }
3167 return 0;
3168}
3169
3170static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
3171{
3172 u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
3173
3174 return (vcpu->arch.apf.msr_en_val & mask) == mask;
3175}
3176
3177static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
3178{
3179 gpa_t gpa = data & ~0x3f;
3180
3181
3182 if (data & 0x30)
3183 return 1;
3184
3185 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
3186 (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
3187 return 1;
3188
3189 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
3190 (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
3191 return 1;
3192
3193 if (!lapic_in_kernel(vcpu))
3194 return data ? 1 : 0;
3195
3196 vcpu->arch.apf.msr_en_val = data;
3197
3198 if (!kvm_pv_async_pf_enabled(vcpu)) {
3199 kvm_clear_async_pf_completion_queue(vcpu);
3200 kvm_async_pf_hash_reset(vcpu);
3201 return 0;
3202 }
3203
3204 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
3205 sizeof(u64)))
3206 return 1;
3207
3208 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
3209 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
3210
3211 kvm_async_pf_wakeup_all(vcpu);
3212
3213 return 0;
3214}
3215
3216static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
3217{
3218
3219 if (data >> 8)
3220 return 1;
3221
3222 if (!lapic_in_kernel(vcpu))
3223 return 1;
3224
3225 vcpu->arch.apf.msr_int_val = data;
3226
3227 vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
3228
3229 return 0;
3230}
3231
3232static void kvmclock_reset(struct kvm_vcpu *vcpu)
3233{
3234 vcpu->arch.pv_time_enabled = false;
3235 vcpu->arch.time = 0;
3236}
3237
3238static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
3239{
3240 ++vcpu->stat.tlb_flush;
3241 static_call(kvm_x86_tlb_flush_all)(vcpu);
3242}
3243
3244static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
3245{
3246 ++vcpu->stat.tlb_flush;
3247
3248 if (!tdp_enabled) {
3249
3250
3251
3252
3253
3254
3255 kvm_mmu_sync_roots(vcpu);
3256 kvm_mmu_sync_prev_roots(vcpu);
3257 }
3258
3259 static_call(kvm_x86_tlb_flush_guest)(vcpu);
3260}
3261
3262
3263static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
3264{
3265 ++vcpu->stat.tlb_flush;
3266 static_call(kvm_x86_tlb_flush_current)(vcpu);
3267}
3268
3269
3270
3271
3272
3273
3274
3275void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
3276{
3277 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
3278 kvm_vcpu_flush_tlb_current(vcpu);
3279
3280 if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
3281 kvm_vcpu_flush_tlb_guest(vcpu);
3282}
3283EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
3284
3285static void record_steal_time(struct kvm_vcpu *vcpu)
3286{
3287 struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
3288 struct kvm_steal_time __user *st;
3289 struct kvm_memslots *slots;
3290 u64 steal;
3291 u32 version;
3292
3293 if (kvm_xen_msr_enabled(vcpu->kvm)) {
3294 kvm_xen_runstate_set_running(vcpu);
3295 return;
3296 }
3297
3298 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3299 return;
3300
3301 if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
3302 return;
3303
3304 slots = kvm_memslots(vcpu->kvm);
3305
3306 if (unlikely(slots->generation != ghc->generation ||
3307 kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
3308 gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
3309
3310
3311 BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
3312
3313 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
3314 kvm_is_error_hva(ghc->hva) || !ghc->memslot)
3315 return;
3316 }
3317
3318 st = (struct kvm_steal_time __user *)ghc->hva;
3319
3320
3321
3322
3323 if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
3324 u8 st_preempted = 0;
3325 int err = -EFAULT;
3326
3327 if (!user_access_begin(st, sizeof(*st)))
3328 return;
3329
3330 asm volatile("1: xchgb %0, %2\n"
3331 "xor %1, %1\n"
3332 "2:\n"
3333 _ASM_EXTABLE_UA(1b, 2b)
3334 : "+q" (st_preempted),
3335 "+&r" (err),
3336 "+m" (st->preempted));
3337 if (err)
3338 goto out;
3339
3340 user_access_end();
3341
3342 vcpu->arch.st.preempted = 0;
3343
3344 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
3345 st_preempted & KVM_VCPU_FLUSH_TLB);
3346 if (st_preempted & KVM_VCPU_FLUSH_TLB)
3347 kvm_vcpu_flush_tlb_guest(vcpu);
3348
3349 if (!user_access_begin(st, sizeof(*st)))
3350 goto dirty;
3351 } else {
3352 if (!user_access_begin(st, sizeof(*st)))
3353 return;
3354
3355 unsafe_put_user(0, &st->preempted, out);
3356 vcpu->arch.st.preempted = 0;
3357 }
3358
3359 unsafe_get_user(version, &st->version, out);
3360 if (version & 1)
3361 version += 1;
3362
3363 version += 1;
3364 unsafe_put_user(version, &st->version, out);
3365
3366 smp_wmb();
3367
3368 unsafe_get_user(steal, &st->steal, out);
3369 steal += current->sched_info.run_delay -
3370 vcpu->arch.st.last_steal;
3371 vcpu->arch.st.last_steal = current->sched_info.run_delay;
3372 unsafe_put_user(steal, &st->steal, out);
3373
3374 version += 1;
3375 unsafe_put_user(version, &st->version, out);
3376
3377 out:
3378 user_access_end();
3379 dirty:
3380 mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
3381}
3382
3383int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3384{
3385 bool pr = false;
3386 u32 msr = msr_info->index;
3387 u64 data = msr_info->data;
3388
3389 if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
3390 return kvm_xen_write_hypercall_page(vcpu, data);
3391
3392 switch (msr) {
3393 case MSR_AMD64_NB_CFG:
3394 case MSR_IA32_UCODE_WRITE:
3395 case MSR_VM_HSAVE_PA:
3396 case MSR_AMD64_PATCH_LOADER:
3397 case MSR_AMD64_BU_CFG2:
3398 case MSR_AMD64_DC_CFG:
3399 case MSR_F15H_EX_CFG:
3400 break;
3401
3402 case MSR_IA32_UCODE_REV:
3403 if (msr_info->host_initiated)
3404 vcpu->arch.microcode_version = data;
3405 break;
3406 case MSR_IA32_ARCH_CAPABILITIES:
3407 if (!msr_info->host_initiated)
3408 return 1;
3409 vcpu->arch.arch_capabilities = data;
3410 break;
3411 case MSR_IA32_PERF_CAPABILITIES: {
3412 struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
3413
3414 if (!msr_info->host_initiated)
3415 return 1;
3416 if (kvm_get_msr_feature(&msr_ent))
3417 return 1;
3418 if (data & ~msr_ent.data)
3419 return 1;
3420
3421 vcpu->arch.perf_capabilities = data;
3422
3423 return 0;
3424 }
3425 case MSR_EFER:
3426 return set_efer(vcpu, msr_info);
3427 case MSR_K7_HWCR:
3428 data &= ~(u64)0x40;
3429 data &= ~(u64)0x100;
3430 data &= ~(u64)0x8;
3431
3432
3433 if (data == BIT_ULL(18)) {
3434 vcpu->arch.msr_hwcr = data;
3435 } else if (data != 0) {
3436 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
3437 data);
3438 return 1;
3439 }
3440 break;
3441 case MSR_FAM10H_MMIO_CONF_BASE:
3442 if (data != 0) {
3443 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
3444 "0x%llx\n", data);
3445 return 1;
3446 }
3447 break;
3448 case 0x200 ... 0x2ff:
3449 return kvm_mtrr_set_msr(vcpu, msr, data);
3450 case MSR_IA32_APICBASE:
3451 return kvm_set_apic_base(vcpu, msr_info);
3452 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3453 return kvm_x2apic_msr_write(vcpu, msr, data);
3454 case MSR_IA32_TSC_DEADLINE:
3455 kvm_set_lapic_tscdeadline_msr(vcpu, data);
3456 break;
3457 case MSR_IA32_TSC_ADJUST:
3458 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
3459 if (!msr_info->host_initiated) {
3460 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
3461 adjust_tsc_offset_guest(vcpu, adj);
3462
3463
3464
3465 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3466 }
3467 vcpu->arch.ia32_tsc_adjust_msr = data;
3468 }
3469 break;
3470 case MSR_IA32_MISC_ENABLE:
3471 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
3472 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
3473 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
3474 return 1;
3475 vcpu->arch.ia32_misc_enable_msr = data;
3476 kvm_update_cpuid_runtime(vcpu);
3477 } else {
3478 vcpu->arch.ia32_misc_enable_msr = data;
3479 }
3480 break;
3481 case MSR_IA32_SMBASE:
3482 if (!msr_info->host_initiated)
3483 return 1;
3484 vcpu->arch.smbase = data;
3485 break;
3486 case MSR_IA32_POWER_CTL:
3487 vcpu->arch.msr_ia32_power_ctl = data;
3488 break;
3489 case MSR_IA32_TSC:
3490 if (msr_info->host_initiated) {
3491 kvm_synchronize_tsc(vcpu, data);
3492 } else {
3493 u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
3494 adjust_tsc_offset_guest(vcpu, adj);
3495 vcpu->arch.ia32_tsc_adjust_msr += adj;
3496 }
3497 break;
3498 case MSR_IA32_XSS:
3499 if (!msr_info->host_initiated &&
3500 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3501 return 1;
3502
3503
3504
3505
3506
3507 if (data & ~supported_xss)
3508 return 1;
3509 vcpu->arch.ia32_xss = data;
3510 break;
3511 case MSR_SMI_COUNT:
3512 if (!msr_info->host_initiated)
3513 return 1;
3514 vcpu->arch.smi_count = data;
3515 break;
3516 case MSR_KVM_WALL_CLOCK_NEW:
3517 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3518 return 1;
3519
3520 vcpu->kvm->arch.wall_clock = data;
3521 kvm_write_wall_clock(vcpu->kvm, data, 0);
3522 break;
3523 case MSR_KVM_WALL_CLOCK:
3524 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3525 return 1;
3526
3527 vcpu->kvm->arch.wall_clock = data;
3528 kvm_write_wall_clock(vcpu->kvm, data, 0);
3529 break;
3530 case MSR_KVM_SYSTEM_TIME_NEW:
3531 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3532 return 1;
3533
3534 kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
3535 break;
3536 case MSR_KVM_SYSTEM_TIME:
3537 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3538 return 1;
3539
3540 kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
3541 break;
3542 case MSR_KVM_ASYNC_PF_EN:
3543 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3544 return 1;
3545
3546 if (kvm_pv_enable_async_pf(vcpu, data))
3547 return 1;
3548 break;
3549 case MSR_KVM_ASYNC_PF_INT:
3550 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3551 return 1;
3552
3553 if (kvm_pv_enable_async_pf_int(vcpu, data))
3554 return 1;
3555 break;
3556 case MSR_KVM_ASYNC_PF_ACK:
3557 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3558 return 1;
3559 if (data & 0x1) {
3560 vcpu->arch.apf.pageready_pending = false;
3561 kvm_check_async_pf_completion(vcpu);
3562 }
3563 break;
3564 case MSR_KVM_STEAL_TIME:
3565 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3566 return 1;
3567
3568 if (unlikely(!sched_info_on()))
3569 return 1;
3570
3571 if (data & KVM_STEAL_RESERVED_MASK)
3572 return 1;
3573
3574 vcpu->arch.st.msr_val = data;
3575
3576 if (!(data & KVM_MSR_ENABLED))
3577 break;
3578
3579 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3580
3581 break;
3582 case MSR_KVM_PV_EOI_EN:
3583 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3584 return 1;
3585
3586 if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
3587 return 1;
3588 break;
3589
3590 case MSR_KVM_POLL_CONTROL:
3591 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3592 return 1;
3593
3594
3595 if (data & (-1ULL << 1))
3596 return 1;
3597
3598 vcpu->arch.msr_kvm_poll_control = data;
3599 break;
3600
3601 case MSR_IA32_MCG_CTL:
3602 case MSR_IA32_MCG_STATUS:
3603 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3604 return set_msr_mce(vcpu, msr_info);
3605
3606 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3607 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3608 pr = true;
3609 fallthrough;
3610 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3611 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3612 if (kvm_pmu_is_valid_msr(vcpu, msr))
3613 return kvm_pmu_set_msr(vcpu, msr_info);
3614
3615 if (pr || data != 0)
3616 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
3617 "0x%x data 0x%llx\n", msr, data);
3618 break;
3619 case MSR_K7_CLK_CTL:
3620
3621
3622
3623
3624
3625
3626
3627
3628 break;
3629 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3630 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3631 case HV_X64_MSR_SYNDBG_OPTIONS:
3632 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3633 case HV_X64_MSR_CRASH_CTL:
3634 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3635 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3636 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3637 case HV_X64_MSR_TSC_EMULATION_STATUS:
3638 return kvm_hv_set_msr_common(vcpu, msr, data,
3639 msr_info->host_initiated);
3640 case MSR_IA32_BBL_CR_CTL3:
3641
3642
3643
3644 if (report_ignored_msrs)
3645 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
3646 msr, data);
3647 break;
3648 case MSR_AMD64_OSVW_ID_LENGTH:
3649 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3650 return 1;
3651 vcpu->arch.osvw.length = data;
3652 break;
3653 case MSR_AMD64_OSVW_STATUS:
3654 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3655 return 1;
3656 vcpu->arch.osvw.status = data;
3657 break;
3658 case MSR_PLATFORM_INFO:
3659 if (!msr_info->host_initiated ||
3660 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
3661 cpuid_fault_enabled(vcpu)))
3662 return 1;
3663 vcpu->arch.msr_platform_info = data;
3664 break;
3665 case MSR_MISC_FEATURES_ENABLES:
3666 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
3667 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3668 !supports_cpuid_fault(vcpu)))
3669 return 1;
3670 vcpu->arch.msr_misc_features_enables = data;
3671 break;
3672 default:
3673 if (kvm_pmu_is_valid_msr(vcpu, msr))
3674 return kvm_pmu_set_msr(vcpu, msr_info);
3675 return KVM_MSR_RET_INVALID;
3676 }
3677 return 0;
3678}
3679EXPORT_SYMBOL_GPL(kvm_set_msr_common);
3680
3681static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
3682{
3683 u64 data;
3684 u64 mcg_cap = vcpu->arch.mcg_cap;
3685 unsigned bank_num = mcg_cap & 0xff;
3686
3687 switch (msr) {
3688 case MSR_IA32_P5_MC_ADDR:
3689 case MSR_IA32_P5_MC_TYPE:
3690 data = 0;
3691 break;
3692 case MSR_IA32_MCG_CAP:
3693 data = vcpu->arch.mcg_cap;
3694 break;
3695 case MSR_IA32_MCG_CTL:
3696 if (!(mcg_cap & MCG_CTL_P) && !host)
3697 return 1;
3698 data = vcpu->arch.mcg_ctl;
3699 break;
3700 case MSR_IA32_MCG_STATUS:
3701 data = vcpu->arch.mcg_status;
3702 break;
3703 default:
3704 if (msr >= MSR_IA32_MC0_CTL &&
3705 msr < MSR_IA32_MCx_CTL(bank_num)) {
3706 u32 offset = array_index_nospec(
3707 msr - MSR_IA32_MC0_CTL,
3708 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3709
3710 data = vcpu->arch.mce_banks[offset];
3711 break;
3712 }
3713 return 1;
3714 }
3715 *pdata = data;
3716 return 0;
3717}
3718
3719int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3720{
3721 switch (msr_info->index) {
3722 case MSR_IA32_PLATFORM_ID:
3723 case MSR_IA32_EBL_CR_POWERON:
3724 case MSR_IA32_LASTBRANCHFROMIP:
3725 case MSR_IA32_LASTBRANCHTOIP:
3726 case MSR_IA32_LASTINTFROMIP:
3727 case MSR_IA32_LASTINTTOIP:
3728 case MSR_AMD64_SYSCFG:
3729 case MSR_K8_TSEG_ADDR:
3730 case MSR_K8_TSEG_MASK:
3731 case MSR_VM_HSAVE_PA:
3732 case MSR_K8_INT_PENDING_MSG:
3733 case MSR_AMD64_NB_CFG:
3734 case MSR_FAM10H_MMIO_CONF_BASE:
3735 case MSR_AMD64_BU_CFG2:
3736 case MSR_IA32_PERF_CTL:
3737 case MSR_AMD64_DC_CFG:
3738 case MSR_F15H_EX_CFG:
3739
3740
3741
3742
3743
3744
3745 case MSR_RAPL_POWER_UNIT:
3746 case MSR_PP0_ENERGY_STATUS:
3747 case MSR_PP1_ENERGY_STATUS:
3748 case MSR_PKG_ENERGY_STATUS:
3749 case MSR_DRAM_ENERGY_STATUS:
3750 msr_info->data = 0;
3751 break;
3752 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3753 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3754 return kvm_pmu_get_msr(vcpu, msr_info);
3755 if (!msr_info->host_initiated)
3756 return 1;
3757 msr_info->data = 0;
3758 break;
3759 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3760 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3761 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3762 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3763 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3764 return kvm_pmu_get_msr(vcpu, msr_info);
3765 msr_info->data = 0;
3766 break;
3767 case MSR_IA32_UCODE_REV:
3768 msr_info->data = vcpu->arch.microcode_version;
3769 break;
3770 case MSR_IA32_ARCH_CAPABILITIES:
3771 if (!msr_info->host_initiated &&
3772 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3773 return 1;
3774 msr_info->data = vcpu->arch.arch_capabilities;
3775 break;
3776 case MSR_IA32_PERF_CAPABILITIES:
3777 if (!msr_info->host_initiated &&
3778 !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
3779 return 1;
3780 msr_info->data = vcpu->arch.perf_capabilities;
3781 break;
3782 case MSR_IA32_POWER_CTL:
3783 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
3784 break;
3785 case MSR_IA32_TSC: {
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795 u64 offset, ratio;
3796
3797 if (msr_info->host_initiated) {
3798 offset = vcpu->arch.l1_tsc_offset;
3799 ratio = vcpu->arch.l1_tsc_scaling_ratio;
3800 } else {
3801 offset = vcpu->arch.tsc_offset;
3802 ratio = vcpu->arch.tsc_scaling_ratio;
3803 }
3804
3805 msr_info->data = kvm_scale_tsc(vcpu, rdtsc(), ratio) + offset;
3806 break;
3807 }
3808 case MSR_MTRRcap:
3809 case 0x200 ... 0x2ff:
3810 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
3811 case 0xcd:
3812 msr_info->data = 3;
3813 break;
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825 case MSR_EBC_FREQUENCY_ID:
3826 msr_info->data = 1 << 24;
3827 break;
3828 case MSR_IA32_APICBASE:
3829 msr_info->data = kvm_get_apic_base(vcpu);
3830 break;
3831 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3832 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
3833 case MSR_IA32_TSC_DEADLINE:
3834 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
3835 break;
3836 case MSR_IA32_TSC_ADJUST:
3837 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
3838 break;
3839 case MSR_IA32_MISC_ENABLE:
3840 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
3841 break;
3842 case MSR_IA32_SMBASE:
3843 if (!msr_info->host_initiated)
3844 return 1;
3845 msr_info->data = vcpu->arch.smbase;
3846 break;
3847 case MSR_SMI_COUNT:
3848 msr_info->data = vcpu->arch.smi_count;
3849 break;
3850 case MSR_IA32_PERF_STATUS:
3851
3852 msr_info->data = 1000ULL;
3853
3854 msr_info->data |= (((uint64_t)4ULL) << 40);
3855 break;
3856 case MSR_EFER:
3857 msr_info->data = vcpu->arch.efer;
3858 break;
3859 case MSR_KVM_WALL_CLOCK:
3860 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3861 return 1;
3862
3863 msr_info->data = vcpu->kvm->arch.wall_clock;
3864 break;
3865 case MSR_KVM_WALL_CLOCK_NEW:
3866 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3867 return 1;
3868
3869 msr_info->data = vcpu->kvm->arch.wall_clock;
3870 break;
3871 case MSR_KVM_SYSTEM_TIME:
3872 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3873 return 1;
3874
3875 msr_info->data = vcpu->arch.time;
3876 break;
3877 case MSR_KVM_SYSTEM_TIME_NEW:
3878 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3879 return 1;
3880
3881 msr_info->data = vcpu->arch.time;
3882 break;
3883 case MSR_KVM_ASYNC_PF_EN:
3884 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3885 return 1;
3886
3887 msr_info->data = vcpu->arch.apf.msr_en_val;
3888 break;
3889 case MSR_KVM_ASYNC_PF_INT:
3890 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3891 return 1;
3892
3893 msr_info->data = vcpu->arch.apf.msr_int_val;
3894 break;
3895 case MSR_KVM_ASYNC_PF_ACK:
3896 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3897 return 1;
3898
3899 msr_info->data = 0;
3900 break;
3901 case MSR_KVM_STEAL_TIME:
3902 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3903 return 1;
3904
3905 msr_info->data = vcpu->arch.st.msr_val;
3906 break;
3907 case MSR_KVM_PV_EOI_EN:
3908 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3909 return 1;
3910
3911 msr_info->data = vcpu->arch.pv_eoi.msr_val;
3912 break;
3913 case MSR_KVM_POLL_CONTROL:
3914 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3915 return 1;
3916
3917 msr_info->data = vcpu->arch.msr_kvm_poll_control;
3918 break;
3919 case MSR_IA32_P5_MC_ADDR:
3920 case MSR_IA32_P5_MC_TYPE:
3921 case MSR_IA32_MCG_CAP:
3922 case MSR_IA32_MCG_CTL:
3923 case MSR_IA32_MCG_STATUS:
3924 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3925 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
3926 msr_info->host_initiated);
3927 case MSR_IA32_XSS:
3928 if (!msr_info->host_initiated &&
3929 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3930 return 1;
3931 msr_info->data = vcpu->arch.ia32_xss;
3932 break;
3933 case MSR_K7_CLK_CTL:
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943 msr_info->data = 0x20000000;
3944 break;
3945 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3946 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3947 case HV_X64_MSR_SYNDBG_OPTIONS:
3948 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3949 case HV_X64_MSR_CRASH_CTL:
3950 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3951 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3952 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3953 case HV_X64_MSR_TSC_EMULATION_STATUS:
3954 return kvm_hv_get_msr_common(vcpu,
3955 msr_info->index, &msr_info->data,
3956 msr_info->host_initiated);
3957 case MSR_IA32_BBL_CR_CTL3:
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968 msr_info->data = 0xbe702111;
3969 break;
3970 case MSR_AMD64_OSVW_ID_LENGTH:
3971 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3972 return 1;
3973 msr_info->data = vcpu->arch.osvw.length;
3974 break;
3975 case MSR_AMD64_OSVW_STATUS:
3976 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3977 return 1;
3978 msr_info->data = vcpu->arch.osvw.status;
3979 break;
3980 case MSR_PLATFORM_INFO:
3981 if (!msr_info->host_initiated &&
3982 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
3983 return 1;
3984 msr_info->data = vcpu->arch.msr_platform_info;
3985 break;
3986 case MSR_MISC_FEATURES_ENABLES:
3987 msr_info->data = vcpu->arch.msr_misc_features_enables;
3988 break;
3989 case MSR_K7_HWCR:
3990 msr_info->data = vcpu->arch.msr_hwcr;
3991 break;
3992 default:
3993 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3994 return kvm_pmu_get_msr(vcpu, msr_info);
3995 return KVM_MSR_RET_INVALID;
3996 }
3997 return 0;
3998}
3999EXPORT_SYMBOL_GPL(kvm_get_msr_common);
4000
4001
4002
4003
4004
4005
4006static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
4007 struct kvm_msr_entry *entries,
4008 int (*do_msr)(struct kvm_vcpu *vcpu,
4009 unsigned index, u64 *data))
4010{
4011 int i;
4012
4013 for (i = 0; i < msrs->nmsrs; ++i)
4014 if (do_msr(vcpu, entries[i].index, &entries[i].data))
4015 break;
4016
4017 return i;
4018}
4019
4020
4021
4022
4023
4024
4025static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
4026 int (*do_msr)(struct kvm_vcpu *vcpu,
4027 unsigned index, u64 *data),
4028 int writeback)
4029{
4030 struct kvm_msrs msrs;
4031 struct kvm_msr_entry *entries;
4032 int r, n;
4033 unsigned size;
4034
4035 r = -EFAULT;
4036 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
4037 goto out;
4038
4039 r = -E2BIG;
4040 if (msrs.nmsrs >= MAX_IO_MSRS)
4041 goto out;
4042
4043 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
4044 entries = memdup_user(user_msrs->entries, size);
4045 if (IS_ERR(entries)) {
4046 r = PTR_ERR(entries);
4047 goto out;
4048 }
4049
4050 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
4051 if (r < 0)
4052 goto out_free;
4053
4054 r = -EFAULT;
4055 if (writeback && copy_to_user(user_msrs->entries, entries, size))
4056 goto out_free;
4057
4058 r = n;
4059
4060out_free:
4061 kfree(entries);
4062out:
4063 return r;
4064}
4065
4066static inline bool kvm_can_mwait_in_guest(void)
4067{
4068 return boot_cpu_has(X86_FEATURE_MWAIT) &&
4069 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
4070 boot_cpu_has(X86_FEATURE_ARAT);
4071}
4072
4073static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
4074 struct kvm_cpuid2 __user *cpuid_arg)
4075{
4076 struct kvm_cpuid2 cpuid;
4077 int r;
4078
4079 r = -EFAULT;
4080 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4081 return r;
4082
4083 r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries);
4084 if (r)
4085 return r;
4086
4087 r = -EFAULT;
4088 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4089 return r;
4090
4091 return 0;
4092}
4093
4094int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
4095{
4096 int r = 0;
4097
4098 switch (ext) {
4099 case KVM_CAP_IRQCHIP:
4100 case KVM_CAP_HLT:
4101 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
4102 case KVM_CAP_SET_TSS_ADDR:
4103 case KVM_CAP_EXT_CPUID:
4104 case KVM_CAP_EXT_EMUL_CPUID:
4105 case KVM_CAP_CLOCKSOURCE:
4106 case KVM_CAP_PIT:
4107 case KVM_CAP_NOP_IO_DELAY:
4108 case KVM_CAP_MP_STATE:
4109 case KVM_CAP_SYNC_MMU:
4110 case KVM_CAP_USER_NMI:
4111 case KVM_CAP_REINJECT_CONTROL:
4112 case KVM_CAP_IRQ_INJECT_STATUS:
4113 case KVM_CAP_IOEVENTFD:
4114 case KVM_CAP_IOEVENTFD_NO_LENGTH:
4115 case KVM_CAP_PIT2:
4116 case KVM_CAP_PIT_STATE2:
4117 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
4118 case KVM_CAP_VCPU_EVENTS:
4119 case KVM_CAP_HYPERV:
4120 case KVM_CAP_HYPERV_VAPIC:
4121 case KVM_CAP_HYPERV_SPIN:
4122 case KVM_CAP_HYPERV_SYNIC:
4123 case KVM_CAP_HYPERV_SYNIC2:
4124 case KVM_CAP_HYPERV_VP_INDEX:
4125 case KVM_CAP_HYPERV_EVENTFD:
4126 case KVM_CAP_HYPERV_TLBFLUSH:
4127 case KVM_CAP_HYPERV_SEND_IPI:
4128 case KVM_CAP_HYPERV_CPUID:
4129 case KVM_CAP_HYPERV_ENFORCE_CPUID:
4130 case KVM_CAP_SYS_HYPERV_CPUID:
4131 case KVM_CAP_PCI_SEGMENT:
4132 case KVM_CAP_DEBUGREGS:
4133 case KVM_CAP_X86_ROBUST_SINGLESTEP:
4134 case KVM_CAP_XSAVE:
4135 case KVM_CAP_ASYNC_PF:
4136 case KVM_CAP_ASYNC_PF_INT:
4137 case KVM_CAP_GET_TSC_KHZ:
4138 case KVM_CAP_KVMCLOCK_CTRL:
4139 case KVM_CAP_READONLY_MEM:
4140 case KVM_CAP_HYPERV_TIME:
4141 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
4142 case KVM_CAP_TSC_DEADLINE_TIMER:
4143 case KVM_CAP_DISABLE_QUIRKS:
4144 case KVM_CAP_SET_BOOT_CPU_ID:
4145 case KVM_CAP_SPLIT_IRQCHIP:
4146 case KVM_CAP_IMMEDIATE_EXIT:
4147 case KVM_CAP_PMU_EVENT_FILTER:
4148 case KVM_CAP_GET_MSR_FEATURES:
4149 case KVM_CAP_MSR_PLATFORM_INFO:
4150 case KVM_CAP_EXCEPTION_PAYLOAD:
4151 case KVM_CAP_SET_GUEST_DEBUG:
4152 case KVM_CAP_LAST_CPU:
4153 case KVM_CAP_X86_USER_SPACE_MSR:
4154 case KVM_CAP_X86_MSR_FILTER:
4155 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
4156#ifdef CONFIG_X86_SGX_KVM
4157 case KVM_CAP_SGX_ATTRIBUTE:
4158#endif
4159 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
4160 case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
4161 case KVM_CAP_SREGS2:
4162 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
4163 case KVM_CAP_VCPU_ATTRIBUTES:
4164 r = 1;
4165 break;
4166 case KVM_CAP_EXIT_HYPERCALL:
4167 r = KVM_EXIT_HYPERCALL_VALID_MASK;
4168 break;
4169 case KVM_CAP_SET_GUEST_DEBUG2:
4170 return KVM_GUESTDBG_VALID_MASK;
4171#ifdef CONFIG_KVM_XEN
4172 case KVM_CAP_XEN_HVM:
4173 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
4174 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
4175 KVM_XEN_HVM_CONFIG_SHARED_INFO;
4176 if (sched_info_on())
4177 r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
4178 break;
4179#endif
4180 case KVM_CAP_SYNC_REGS:
4181 r = KVM_SYNC_X86_VALID_FIELDS;
4182 break;
4183 case KVM_CAP_ADJUST_CLOCK:
4184 r = KVM_CLOCK_VALID_FLAGS;
4185 break;
4186 case KVM_CAP_X86_DISABLE_EXITS:
4187 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
4188 KVM_X86_DISABLE_EXITS_CSTATE;
4189 if(kvm_can_mwait_in_guest())
4190 r |= KVM_X86_DISABLE_EXITS_MWAIT;
4191 break;
4192 case KVM_CAP_X86_SMM:
4193
4194
4195
4196
4197
4198
4199
4200
4201 r = static_call(kvm_x86_has_emulated_msr)(kvm, MSR_IA32_SMBASE);
4202 break;
4203 case KVM_CAP_VAPIC:
4204 r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
4205 break;
4206 case KVM_CAP_NR_VCPUS:
4207 r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
4208 break;
4209 case KVM_CAP_MAX_VCPUS:
4210 r = KVM_MAX_VCPUS;
4211 break;
4212 case KVM_CAP_MAX_VCPU_ID:
4213 r = KVM_MAX_VCPU_IDS;
4214 break;
4215 case KVM_CAP_PV_MMU:
4216 r = 0;
4217 break;
4218 case KVM_CAP_MCE:
4219 r = KVM_MAX_MCE_BANKS;
4220 break;
4221 case KVM_CAP_XCRS:
4222 r = boot_cpu_has(X86_FEATURE_XSAVE);
4223 break;
4224 case KVM_CAP_TSC_CONTROL:
4225 r = kvm_has_tsc_control;
4226 break;
4227 case KVM_CAP_X2APIC_API:
4228 r = KVM_X2APIC_API_VALID_FLAGS;
4229 break;
4230 case KVM_CAP_NESTED_STATE:
4231 r = kvm_x86_ops.nested_ops->get_state ?
4232 kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
4233 break;
4234 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4235 r = kvm_x86_ops.enable_direct_tlbflush != NULL;
4236 break;
4237 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4238 r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
4239 break;
4240 case KVM_CAP_SMALLER_MAXPHYADDR:
4241 r = (int) allow_smaller_maxphyaddr;
4242 break;
4243 case KVM_CAP_STEAL_TIME:
4244 r = sched_info_on();
4245 break;
4246 case KVM_CAP_X86_BUS_LOCK_EXIT:
4247 if (kvm_has_bus_lock_exit)
4248 r = KVM_BUS_LOCK_DETECTION_OFF |
4249 KVM_BUS_LOCK_DETECTION_EXIT;
4250 else
4251 r = 0;
4252 break;
4253 default:
4254 break;
4255 }
4256 return r;
4257
4258}
4259
4260long kvm_arch_dev_ioctl(struct file *filp,
4261 unsigned int ioctl, unsigned long arg)
4262{
4263 void __user *argp = (void __user *)arg;
4264 long r;
4265
4266 switch (ioctl) {
4267 case KVM_GET_MSR_INDEX_LIST: {
4268 struct kvm_msr_list __user *user_msr_list = argp;
4269 struct kvm_msr_list msr_list;
4270 unsigned n;
4271
4272 r = -EFAULT;
4273 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4274 goto out;
4275 n = msr_list.nmsrs;
4276 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
4277 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4278 goto out;
4279 r = -E2BIG;
4280 if (n < msr_list.nmsrs)
4281 goto out;
4282 r = -EFAULT;
4283 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
4284 num_msrs_to_save * sizeof(u32)))
4285 goto out;
4286 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
4287 &emulated_msrs,
4288 num_emulated_msrs * sizeof(u32)))
4289 goto out;
4290 r = 0;
4291 break;
4292 }
4293 case KVM_GET_SUPPORTED_CPUID:
4294 case KVM_GET_EMULATED_CPUID: {
4295 struct kvm_cpuid2 __user *cpuid_arg = argp;
4296 struct kvm_cpuid2 cpuid;
4297
4298 r = -EFAULT;
4299 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4300 goto out;
4301
4302 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
4303 ioctl);
4304 if (r)
4305 goto out;
4306
4307 r = -EFAULT;
4308 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4309 goto out;
4310 r = 0;
4311 break;
4312 }
4313 case KVM_X86_GET_MCE_CAP_SUPPORTED:
4314 r = -EFAULT;
4315 if (copy_to_user(argp, &kvm_mce_cap_supported,
4316 sizeof(kvm_mce_cap_supported)))
4317 goto out;
4318 r = 0;
4319 break;
4320 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
4321 struct kvm_msr_list __user *user_msr_list = argp;
4322 struct kvm_msr_list msr_list;
4323 unsigned int n;
4324
4325 r = -EFAULT;
4326 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4327 goto out;
4328 n = msr_list.nmsrs;
4329 msr_list.nmsrs = num_msr_based_features;
4330 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4331 goto out;
4332 r = -E2BIG;
4333 if (n < msr_list.nmsrs)
4334 goto out;
4335 r = -EFAULT;
4336 if (copy_to_user(user_msr_list->indices, &msr_based_features,
4337 num_msr_based_features * sizeof(u32)))
4338 goto out;
4339 r = 0;
4340 break;
4341 }
4342 case KVM_GET_MSRS:
4343 r = msr_io(NULL, argp, do_get_msr_feature, 1);
4344 break;
4345 case KVM_GET_SUPPORTED_HV_CPUID:
4346 r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
4347 break;
4348 default:
4349 r = -EINVAL;
4350 break;
4351 }
4352out:
4353 return r;
4354}
4355
4356static void wbinvd_ipi(void *garbage)
4357{
4358 wbinvd();
4359}
4360
4361static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
4362{
4363 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
4364}
4365
4366void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4367{
4368
4369 if (need_emulate_wbinvd(vcpu)) {
4370 if (static_call(kvm_x86_has_wbinvd_exit)())
4371 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4372 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
4373 smp_call_function_single(vcpu->cpu,
4374 wbinvd_ipi, NULL, 1);
4375 }
4376
4377 static_call(kvm_x86_vcpu_load)(vcpu, cpu);
4378
4379
4380 vcpu->arch.host_pkru = read_pkru();
4381
4382
4383 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
4384 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
4385 vcpu->arch.tsc_offset_adjustment = 0;
4386 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4387 }
4388
4389 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
4390 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
4391 rdtsc() - vcpu->arch.last_host_tsc;
4392 if (tsc_delta < 0)
4393 mark_tsc_unstable("KVM discovered backwards TSC");
4394
4395 if (kvm_check_tsc_unstable()) {
4396 u64 offset = kvm_compute_l1_tsc_offset(vcpu,
4397 vcpu->arch.last_guest_tsc);
4398 kvm_vcpu_write_tsc_offset(vcpu, offset);
4399 vcpu->arch.tsc_catchup = 1;
4400 }
4401
4402 if (kvm_lapic_hv_timer_in_use(vcpu))
4403 kvm_lapic_restart_hv_timer(vcpu);
4404
4405
4406
4407
4408
4409 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
4410 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
4411 if (vcpu->cpu != cpu)
4412 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
4413 vcpu->cpu = cpu;
4414 }
4415
4416 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
4417}
4418
4419static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
4420{
4421 struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
4422 struct kvm_steal_time __user *st;
4423 struct kvm_memslots *slots;
4424 static const u8 preempted = KVM_VCPU_PREEMPTED;
4425
4426 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
4427 return;
4428
4429 if (vcpu->arch.st.preempted)
4430 return;
4431
4432
4433 if (unlikely(current->mm != vcpu->kvm->mm))
4434 return;
4435
4436 slots = kvm_memslots(vcpu->kvm);
4437
4438 if (unlikely(slots->generation != ghc->generation ||
4439 kvm_is_error_hva(ghc->hva) || !ghc->memslot))
4440 return;
4441
4442 st = (struct kvm_steal_time __user *)ghc->hva;
4443 BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
4444
4445 if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
4446 vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
4447
4448 mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
4449}
4450
4451void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
4452{
4453 int idx;
4454
4455 if (vcpu->preempted && !vcpu->arch.guest_state_protected)
4456 vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
4457
4458
4459
4460
4461
4462 idx = srcu_read_lock(&vcpu->kvm->srcu);
4463 if (kvm_xen_msr_enabled(vcpu->kvm))
4464 kvm_xen_runstate_set_preempted(vcpu);
4465 else
4466 kvm_steal_time_set_preempted(vcpu);
4467 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4468
4469 static_call(kvm_x86_vcpu_put)(vcpu);
4470 vcpu->arch.last_host_tsc = rdtsc();
4471}
4472
4473static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
4474 struct kvm_lapic_state *s)
4475{
4476 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
4477
4478 return kvm_apic_get_state(vcpu, s);
4479}
4480
4481static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
4482 struct kvm_lapic_state *s)
4483{
4484 int r;
4485
4486 r = kvm_apic_set_state(vcpu, s);
4487 if (r)
4488 return r;
4489 update_cr8_intercept(vcpu);
4490
4491 return 0;
4492}
4493
4494static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
4495{
4496
4497
4498
4499
4500
4501
4502 if (kvm_cpu_has_extint(vcpu))
4503 return false;
4504
4505
4506 return (!lapic_in_kernel(vcpu) ||
4507 kvm_apic_accept_pic_intr(vcpu));
4508}
4509
4510static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
4511{
4512
4513
4514
4515
4516
4517
4518
4519 return (kvm_arch_interrupt_allowed(vcpu) &&
4520 kvm_cpu_accept_dm_intr(vcpu) &&
4521 !kvm_event_needs_reinjection(vcpu) &&
4522 !vcpu->arch.exception.pending);
4523}
4524
4525static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
4526 struct kvm_interrupt *irq)
4527{
4528 if (irq->irq >= KVM_NR_INTERRUPTS)
4529 return -EINVAL;
4530
4531 if (!irqchip_in_kernel(vcpu->kvm)) {
4532 kvm_queue_interrupt(vcpu, irq->irq, false);
4533 kvm_make_request(KVM_REQ_EVENT, vcpu);
4534 return 0;
4535 }
4536
4537
4538
4539
4540
4541 if (pic_in_kernel(vcpu->kvm))
4542 return -ENXIO;
4543
4544 if (vcpu->arch.pending_external_vector != -1)
4545 return -EEXIST;
4546
4547 vcpu->arch.pending_external_vector = irq->irq;
4548 kvm_make_request(KVM_REQ_EVENT, vcpu);
4549 return 0;
4550}
4551
4552static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
4553{
4554 kvm_inject_nmi(vcpu);
4555
4556 return 0;
4557}
4558
4559static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
4560{
4561 kvm_make_request(KVM_REQ_SMI, vcpu);
4562
4563 return 0;
4564}
4565
4566static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
4567 struct kvm_tpr_access_ctl *tac)
4568{
4569 if (tac->flags)
4570 return -EINVAL;
4571 vcpu->arch.tpr_access_reporting = !!tac->enabled;
4572 return 0;
4573}
4574
4575static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
4576 u64 mcg_cap)
4577{
4578 int r;
4579 unsigned bank_num = mcg_cap & 0xff, bank;
4580
4581 r = -EINVAL;
4582 if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
4583 goto out;
4584 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
4585 goto out;
4586 r = 0;
4587 vcpu->arch.mcg_cap = mcg_cap;
4588
4589 if (mcg_cap & MCG_CTL_P)
4590 vcpu->arch.mcg_ctl = ~(u64)0;
4591
4592 for (bank = 0; bank < bank_num; bank++)
4593 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
4594
4595 static_call(kvm_x86_setup_mce)(vcpu);
4596out:
4597 return r;
4598}
4599
4600static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
4601 struct kvm_x86_mce *mce)
4602{
4603 u64 mcg_cap = vcpu->arch.mcg_cap;
4604 unsigned bank_num = mcg_cap & 0xff;
4605 u64 *banks = vcpu->arch.mce_banks;
4606
4607 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
4608 return -EINVAL;
4609
4610
4611
4612
4613 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
4614 vcpu->arch.mcg_ctl != ~(u64)0)
4615 return 0;
4616 banks += 4 * mce->bank;
4617
4618
4619
4620
4621 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
4622 return 0;
4623 if (mce->status & MCI_STATUS_UC) {
4624 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
4625 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
4626 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
4627 return 0;
4628 }
4629 if (banks[1] & MCI_STATUS_VAL)
4630 mce->status |= MCI_STATUS_OVER;
4631 banks[2] = mce->addr;
4632 banks[3] = mce->misc;
4633 vcpu->arch.mcg_status = mce->mcg_status;
4634 banks[1] = mce->status;
4635 kvm_queue_exception(vcpu, MC_VECTOR);
4636 } else if (!(banks[1] & MCI_STATUS_VAL)
4637 || !(banks[1] & MCI_STATUS_UC)) {
4638 if (banks[1] & MCI_STATUS_VAL)
4639 mce->status |= MCI_STATUS_OVER;
4640 banks[2] = mce->addr;
4641 banks[3] = mce->misc;
4642 banks[1] = mce->status;
4643 } else
4644 banks[1] |= MCI_STATUS_OVER;
4645 return 0;
4646}
4647
4648static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
4649 struct kvm_vcpu_events *events)
4650{
4651 process_nmi(vcpu);
4652
4653 if (kvm_check_request(KVM_REQ_SMI, vcpu))
4654 process_smi(vcpu);
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667 if (!vcpu->kvm->arch.exception_payload_enabled &&
4668 vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
4669 kvm_deliver_exception_payload(vcpu);
4670
4671
4672
4673
4674
4675
4676
4677 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
4678 events->exception.injected = 0;
4679 events->exception.pending = 0;
4680 } else {
4681 events->exception.injected = vcpu->arch.exception.injected;
4682 events->exception.pending = vcpu->arch.exception.pending;
4683
4684
4685
4686
4687
4688 if (!vcpu->kvm->arch.exception_payload_enabled)
4689 events->exception.injected |=
4690 vcpu->arch.exception.pending;
4691 }
4692 events->exception.nr = vcpu->arch.exception.nr;
4693 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
4694 events->exception.error_code = vcpu->arch.exception.error_code;
4695 events->exception_has_payload = vcpu->arch.exception.has_payload;
4696 events->exception_payload = vcpu->arch.exception.payload;
4697
4698 events->interrupt.injected =
4699 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
4700 events->interrupt.nr = vcpu->arch.interrupt.nr;
4701 events->interrupt.soft = 0;
4702 events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
4703
4704 events->nmi.injected = vcpu->arch.nmi_injected;
4705 events->nmi.pending = vcpu->arch.nmi_pending != 0;
4706 events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
4707 events->nmi.pad = 0;
4708
4709 events->sipi_vector = 0;
4710
4711 events->smi.smm = is_smm(vcpu);
4712 events->smi.pending = vcpu->arch.smi_pending;
4713 events->smi.smm_inside_nmi =
4714 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
4715 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
4716
4717 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
4718 | KVM_VCPUEVENT_VALID_SHADOW
4719 | KVM_VCPUEVENT_VALID_SMM);
4720 if (vcpu->kvm->arch.exception_payload_enabled)
4721 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
4722
4723 memset(&events->reserved, 0, sizeof(events->reserved));
4724}
4725
4726static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
4727
4728static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
4729 struct kvm_vcpu_events *events)
4730{
4731 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
4732 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
4733 | KVM_VCPUEVENT_VALID_SHADOW
4734 | KVM_VCPUEVENT_VALID_SMM
4735 | KVM_VCPUEVENT_VALID_PAYLOAD))
4736 return -EINVAL;
4737
4738 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
4739 if (!vcpu->kvm->arch.exception_payload_enabled)
4740 return -EINVAL;
4741 if (events->exception.pending)
4742 events->exception.injected = 0;
4743 else
4744 events->exception_has_payload = 0;
4745 } else {
4746 events->exception.pending = 0;
4747 events->exception_has_payload = 0;
4748 }
4749
4750 if ((events->exception.injected || events->exception.pending) &&
4751 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
4752 return -EINVAL;
4753
4754
4755 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
4756 (events->smi.smm || events->smi.pending) &&
4757 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4758 return -EINVAL;
4759
4760 process_nmi(vcpu);
4761 vcpu->arch.exception.injected = events->exception.injected;
4762 vcpu->arch.exception.pending = events->exception.pending;
4763 vcpu->arch.exception.nr = events->exception.nr;
4764 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
4765 vcpu->arch.exception.error_code = events->exception.error_code;
4766 vcpu->arch.exception.has_payload = events->exception_has_payload;
4767 vcpu->arch.exception.payload = events->exception_payload;
4768
4769 vcpu->arch.interrupt.injected = events->interrupt.injected;
4770 vcpu->arch.interrupt.nr = events->interrupt.nr;
4771 vcpu->arch.interrupt.soft = events->interrupt.soft;
4772 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
4773 static_call(kvm_x86_set_interrupt_shadow)(vcpu,
4774 events->interrupt.shadow);
4775
4776 vcpu->arch.nmi_injected = events->nmi.injected;
4777 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
4778 vcpu->arch.nmi_pending = events->nmi.pending;
4779 static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
4780
4781 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
4782 lapic_in_kernel(vcpu))
4783 vcpu->arch.apic->sipi_vector = events->sipi_vector;
4784
4785 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
4786 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
4787 kvm_smm_changed(vcpu, events->smi.smm);
4788
4789 vcpu->arch.smi_pending = events->smi.pending;
4790
4791 if (events->smi.smm) {
4792 if (events->smi.smm_inside_nmi)
4793 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
4794 else
4795 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
4796 }
4797
4798 if (lapic_in_kernel(vcpu)) {
4799 if (events->smi.latched_init)
4800 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4801 else
4802 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4803 }
4804 }
4805
4806 kvm_make_request(KVM_REQ_EVENT, vcpu);
4807
4808 return 0;
4809}
4810
4811static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
4812 struct kvm_debugregs *dbgregs)
4813{
4814 unsigned long val;
4815
4816 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
4817 kvm_get_dr(vcpu, 6, &val);
4818 dbgregs->dr6 = val;
4819 dbgregs->dr7 = vcpu->arch.dr7;
4820 dbgregs->flags = 0;
4821 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
4822}
4823
4824static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
4825 struct kvm_debugregs *dbgregs)
4826{
4827 if (dbgregs->flags)
4828 return -EINVAL;
4829
4830 if (!kvm_dr6_valid(dbgregs->dr6))
4831 return -EINVAL;
4832 if (!kvm_dr7_valid(dbgregs->dr7))
4833 return -EINVAL;
4834
4835 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
4836 kvm_update_dr0123(vcpu);
4837 vcpu->arch.dr6 = dbgregs->dr6;
4838 vcpu->arch.dr7 = dbgregs->dr7;
4839 kvm_update_dr7(vcpu);
4840
4841 return 0;
4842}
4843
4844static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
4845 struct kvm_xsave *guest_xsave)
4846{
4847 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
4848 return;
4849
4850 fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
4851 guest_xsave->region,
4852 sizeof(guest_xsave->region),
4853 vcpu->arch.pkru);
4854}
4855
4856static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
4857 struct kvm_xsave *guest_xsave)
4858{
4859 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
4860 return 0;
4861
4862 return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
4863 guest_xsave->region,
4864 supported_xcr0, &vcpu->arch.pkru);
4865}
4866
4867static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
4868 struct kvm_xcrs *guest_xcrs)
4869{
4870 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
4871 guest_xcrs->nr_xcrs = 0;
4872 return;
4873 }
4874
4875 guest_xcrs->nr_xcrs = 1;
4876 guest_xcrs->flags = 0;
4877 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
4878 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
4879}
4880
4881static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
4882 struct kvm_xcrs *guest_xcrs)
4883{
4884 int i, r = 0;
4885
4886 if (!boot_cpu_has(X86_FEATURE_XSAVE))
4887 return -EINVAL;
4888
4889 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
4890 return -EINVAL;
4891
4892 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
4893
4894 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
4895 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
4896 guest_xcrs->xcrs[i].value);
4897 break;
4898 }
4899 if (r)
4900 r = -EINVAL;
4901 return r;
4902}
4903
4904
4905
4906
4907
4908
4909
4910static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
4911{
4912 if (!vcpu->arch.pv_time_enabled)
4913 return -EINVAL;
4914 vcpu->arch.pvclock_set_guest_stopped_request = true;
4915 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4916 return 0;
4917}
4918
4919static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
4920 struct kvm_device_attr *attr)
4921{
4922 int r;
4923
4924 switch (attr->attr) {
4925 case KVM_VCPU_TSC_OFFSET:
4926 r = 0;
4927 break;
4928 default:
4929 r = -ENXIO;
4930 }
4931
4932 return r;
4933}
4934
4935static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
4936 struct kvm_device_attr *attr)
4937{
4938 u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
4939 int r;
4940
4941 if ((u64)(unsigned long)uaddr != attr->addr)
4942 return -EFAULT;
4943
4944 switch (attr->attr) {
4945 case KVM_VCPU_TSC_OFFSET:
4946 r = -EFAULT;
4947 if (put_user(vcpu->arch.l1_tsc_offset, uaddr))
4948 break;
4949 r = 0;
4950 break;
4951 default:
4952 r = -ENXIO;
4953 }
4954
4955 return r;
4956}
4957
4958static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
4959 struct kvm_device_attr *attr)
4960{
4961 u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr;
4962 struct kvm *kvm = vcpu->kvm;
4963 int r;
4964
4965 if ((u64)(unsigned long)uaddr != attr->addr)
4966 return -EFAULT;
4967
4968 switch (attr->attr) {
4969 case KVM_VCPU_TSC_OFFSET: {
4970 u64 offset, tsc, ns;
4971 unsigned long flags;
4972 bool matched;
4973
4974 r = -EFAULT;
4975 if (get_user(offset, uaddr))
4976 break;
4977
4978 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
4979
4980 matched = (vcpu->arch.virtual_tsc_khz &&
4981 kvm->arch.last_tsc_khz == vcpu->arch.virtual_tsc_khz &&
4982 kvm->arch.last_tsc_offset == offset);
4983
4984 tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset;
4985 ns = get_kvmclock_base_ns();
4986
4987 __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched);
4988 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
4989
4990 r = 0;
4991 break;
4992 }
4993 default:
4994 r = -ENXIO;
4995 }
4996
4997 return r;
4998}
4999
5000static int kvm_vcpu_ioctl_device_attr(struct kvm_vcpu *vcpu,
5001 unsigned int ioctl,
5002 void __user *argp)
5003{
5004 struct kvm_device_attr attr;
5005 int r;
5006
5007 if (copy_from_user(&attr, argp, sizeof(attr)))
5008 return -EFAULT;
5009
5010 if (attr.group != KVM_VCPU_TSC_CTRL)
5011 return -ENXIO;
5012
5013 switch (ioctl) {
5014 case KVM_HAS_DEVICE_ATTR:
5015 r = kvm_arch_tsc_has_attr(vcpu, &attr);
5016 break;
5017 case KVM_GET_DEVICE_ATTR:
5018 r = kvm_arch_tsc_get_attr(vcpu, &attr);
5019 break;
5020 case KVM_SET_DEVICE_ATTR:
5021 r = kvm_arch_tsc_set_attr(vcpu, &attr);
5022 break;
5023 }
5024
5025 return r;
5026}
5027
5028static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5029 struct kvm_enable_cap *cap)
5030{
5031 int r;
5032 uint16_t vmcs_version;
5033 void __user *user_ptr;
5034
5035 if (cap->flags)
5036 return -EINVAL;
5037
5038 switch (cap->cap) {
5039 case KVM_CAP_HYPERV_SYNIC2:
5040 if (cap->args[0])
5041 return -EINVAL;
5042 fallthrough;
5043
5044 case KVM_CAP_HYPERV_SYNIC:
5045 if (!irqchip_in_kernel(vcpu->kvm))
5046 return -EINVAL;
5047 return kvm_hv_activate_synic(vcpu, cap->cap ==
5048 KVM_CAP_HYPERV_SYNIC2);
5049 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
5050 if (!kvm_x86_ops.nested_ops->enable_evmcs)
5051 return -ENOTTY;
5052 r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
5053 if (!r) {
5054 user_ptr = (void __user *)(uintptr_t)cap->args[0];
5055 if (copy_to_user(user_ptr, &vmcs_version,
5056 sizeof(vmcs_version)))
5057 r = -EFAULT;
5058 }
5059 return r;
5060 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
5061 if (!kvm_x86_ops.enable_direct_tlbflush)
5062 return -ENOTTY;
5063
5064 return static_call(kvm_x86_enable_direct_tlbflush)(vcpu);
5065
5066 case KVM_CAP_HYPERV_ENFORCE_CPUID:
5067 return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]);
5068
5069 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
5070 vcpu->arch.pv_cpuid.enforce = cap->args[0];
5071 if (vcpu->arch.pv_cpuid.enforce)
5072 kvm_update_pv_runtime(vcpu);
5073
5074 return 0;
5075 default:
5076 return -EINVAL;
5077 }
5078}
5079
5080long kvm_arch_vcpu_ioctl(struct file *filp,
5081 unsigned int ioctl, unsigned long arg)
5082{
5083 struct kvm_vcpu *vcpu = filp->private_data;
5084 void __user *argp = (void __user *)arg;
5085 int r;
5086 union {
5087 struct kvm_sregs2 *sregs2;
5088 struct kvm_lapic_state *lapic;
5089 struct kvm_xsave *xsave;
5090 struct kvm_xcrs *xcrs;
5091 void *buffer;
5092 } u;
5093
5094 vcpu_load(vcpu);
5095
5096 u.buffer = NULL;
5097 switch (ioctl) {
5098 case KVM_GET_LAPIC: {
5099 r = -EINVAL;
5100 if (!lapic_in_kernel(vcpu))
5101 goto out;
5102 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
5103 GFP_KERNEL_ACCOUNT);
5104
5105 r = -ENOMEM;
5106 if (!u.lapic)
5107 goto out;
5108 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
5109 if (r)
5110 goto out;
5111 r = -EFAULT;
5112 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
5113 goto out;
5114 r = 0;
5115 break;
5116 }
5117 case KVM_SET_LAPIC: {
5118 r = -EINVAL;
5119 if (!lapic_in_kernel(vcpu))
5120 goto out;
5121 u.lapic = memdup_user(argp, sizeof(*u.lapic));
5122 if (IS_ERR(u.lapic)) {
5123 r = PTR_ERR(u.lapic);
5124 goto out_nofree;
5125 }
5126
5127 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
5128 break;
5129 }
5130 case KVM_INTERRUPT: {
5131 struct kvm_interrupt irq;
5132
5133 r = -EFAULT;
5134 if (copy_from_user(&irq, argp, sizeof(irq)))
5135 goto out;
5136 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
5137 break;
5138 }
5139 case KVM_NMI: {
5140 r = kvm_vcpu_ioctl_nmi(vcpu);
5141 break;
5142 }
5143 case KVM_SMI: {
5144 r = kvm_vcpu_ioctl_smi(vcpu);
5145 break;
5146 }
5147 case KVM_SET_CPUID: {
5148 struct kvm_cpuid __user *cpuid_arg = argp;
5149 struct kvm_cpuid cpuid;
5150
5151
5152
5153
5154
5155
5156
5157
5158 r = -EINVAL;
5159 if (vcpu->arch.last_vmentry_cpu != -1)
5160 goto out;
5161
5162 r = -EFAULT;
5163 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5164 goto out;
5165 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
5166 break;
5167 }
5168 case KVM_SET_CPUID2: {
5169 struct kvm_cpuid2 __user *cpuid_arg = argp;
5170 struct kvm_cpuid2 cpuid;
5171
5172
5173
5174
5175
5176 r = -EINVAL;
5177 if (vcpu->arch.last_vmentry_cpu != -1)
5178 goto out;
5179
5180 r = -EFAULT;
5181 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5182 goto out;
5183 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
5184 cpuid_arg->entries);
5185 break;
5186 }
5187 case KVM_GET_CPUID2: {
5188 struct kvm_cpuid2 __user *cpuid_arg = argp;
5189 struct kvm_cpuid2 cpuid;
5190
5191 r = -EFAULT;
5192 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5193 goto out;
5194 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
5195 cpuid_arg->entries);
5196 if (r)
5197 goto out;
5198 r = -EFAULT;
5199 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
5200 goto out;
5201 r = 0;
5202 break;
5203 }
5204 case KVM_GET_MSRS: {
5205 int idx = srcu_read_lock(&vcpu->kvm->srcu);
5206 r = msr_io(vcpu, argp, do_get_msr, 1);
5207 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5208 break;
5209 }
5210 case KVM_SET_MSRS: {
5211 int idx = srcu_read_lock(&vcpu->kvm->srcu);
5212 r = msr_io(vcpu, argp, do_set_msr, 0);
5213 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5214 break;
5215 }
5216 case KVM_TPR_ACCESS_REPORTING: {
5217 struct kvm_tpr_access_ctl tac;
5218
5219 r = -EFAULT;
5220 if (copy_from_user(&tac, argp, sizeof(tac)))
5221 goto out;
5222 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
5223 if (r)
5224 goto out;
5225 r = -EFAULT;
5226 if (copy_to_user(argp, &tac, sizeof(tac)))
5227 goto out;
5228 r = 0;
5229 break;
5230 };
5231 case KVM_SET_VAPIC_ADDR: {
5232 struct kvm_vapic_addr va;
5233 int idx;
5234
5235 r = -EINVAL;
5236 if (!lapic_in_kernel(vcpu))
5237 goto out;
5238 r = -EFAULT;
5239 if (copy_from_user(&va, argp, sizeof(va)))
5240 goto out;
5241 idx = srcu_read_lock(&vcpu->kvm->srcu);
5242 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
5243 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5244 break;
5245 }
5246 case KVM_X86_SETUP_MCE: {
5247 u64 mcg_cap;
5248
5249 r = -EFAULT;
5250 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
5251 goto out;
5252 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
5253 break;
5254 }
5255 case KVM_X86_SET_MCE: {
5256 struct kvm_x86_mce mce;
5257
5258 r = -EFAULT;
5259 if (copy_from_user(&mce, argp, sizeof(mce)))
5260 goto out;
5261 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
5262 break;
5263 }
5264 case KVM_GET_VCPU_EVENTS: {
5265 struct kvm_vcpu_events events;
5266
5267 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
5268
5269 r = -EFAULT;
5270 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
5271 break;
5272 r = 0;
5273 break;
5274 }
5275 case KVM_SET_VCPU_EVENTS: {
5276 struct kvm_vcpu_events events;
5277
5278 r = -EFAULT;
5279 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
5280 break;
5281
5282 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
5283 break;
5284 }
5285 case KVM_GET_DEBUGREGS: {
5286 struct kvm_debugregs dbgregs;
5287
5288 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
5289
5290 r = -EFAULT;
5291 if (copy_to_user(argp, &dbgregs,
5292 sizeof(struct kvm_debugregs)))
5293 break;
5294 r = 0;
5295 break;
5296 }
5297 case KVM_SET_DEBUGREGS: {
5298 struct kvm_debugregs dbgregs;
5299
5300 r = -EFAULT;
5301 if (copy_from_user(&dbgregs, argp,
5302 sizeof(struct kvm_debugregs)))
5303 break;
5304
5305 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
5306 break;
5307 }
5308 case KVM_GET_XSAVE: {
5309 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
5310 r = -ENOMEM;
5311 if (!u.xsave)
5312 break;
5313
5314 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
5315
5316 r = -EFAULT;
5317 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
5318 break;
5319 r = 0;
5320 break;
5321 }
5322 case KVM_SET_XSAVE: {
5323 u.xsave = memdup_user(argp, sizeof(*u.xsave));
5324 if (IS_ERR(u.xsave)) {
5325 r = PTR_ERR(u.xsave);
5326 goto out_nofree;
5327 }
5328
5329 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
5330 break;
5331 }
5332 case KVM_GET_XCRS: {
5333 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
5334 r = -ENOMEM;
5335 if (!u.xcrs)
5336 break;
5337
5338 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
5339
5340 r = -EFAULT;
5341 if (copy_to_user(argp, u.xcrs,
5342 sizeof(struct kvm_xcrs)))
5343 break;
5344 r = 0;
5345 break;
5346 }
5347 case KVM_SET_XCRS: {
5348 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
5349 if (IS_ERR(u.xcrs)) {
5350 r = PTR_ERR(u.xcrs);
5351 goto out_nofree;
5352 }
5353
5354 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
5355 break;
5356 }
5357 case KVM_SET_TSC_KHZ: {
5358 u32 user_tsc_khz;
5359
5360 r = -EINVAL;
5361 user_tsc_khz = (u32)arg;
5362
5363 if (kvm_has_tsc_control &&
5364 user_tsc_khz >= kvm_max_guest_tsc_khz)
5365 goto out;
5366
5367 if (user_tsc_khz == 0)
5368 user_tsc_khz = tsc_khz;
5369
5370 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
5371 r = 0;
5372
5373 goto out;
5374 }
5375 case KVM_GET_TSC_KHZ: {
5376 r = vcpu->arch.virtual_tsc_khz;
5377 goto out;
5378 }
5379 case KVM_KVMCLOCK_CTRL: {
5380 r = kvm_set_guest_paused(vcpu);
5381 goto out;
5382 }
5383 case KVM_ENABLE_CAP: {
5384 struct kvm_enable_cap cap;
5385
5386 r = -EFAULT;
5387 if (copy_from_user(&cap, argp, sizeof(cap)))
5388 goto out;
5389 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5390 break;
5391 }
5392 case KVM_GET_NESTED_STATE: {
5393 struct kvm_nested_state __user *user_kvm_nested_state = argp;
5394 u32 user_data_size;
5395
5396 r = -EINVAL;
5397 if (!kvm_x86_ops.nested_ops->get_state)
5398 break;
5399
5400 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
5401 r = -EFAULT;
5402 if (get_user(user_data_size, &user_kvm_nested_state->size))
5403 break;
5404
5405 r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
5406 user_data_size);
5407 if (r < 0)
5408 break;
5409
5410 if (r > user_data_size) {
5411 if (put_user(r, &user_kvm_nested_state->size))
5412 r = -EFAULT;
5413 else
5414 r = -E2BIG;
5415 break;
5416 }
5417
5418 r = 0;
5419 break;
5420 }
5421 case KVM_SET_NESTED_STATE: {
5422 struct kvm_nested_state __user *user_kvm_nested_state = argp;
5423 struct kvm_nested_state kvm_state;
5424 int idx;
5425
5426 r = -EINVAL;
5427 if (!kvm_x86_ops.nested_ops->set_state)
5428 break;
5429
5430 r = -EFAULT;
5431 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
5432 break;
5433
5434 r = -EINVAL;
5435 if (kvm_state.size < sizeof(kvm_state))
5436 break;
5437
5438 if (kvm_state.flags &
5439 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
5440 | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
5441 | KVM_STATE_NESTED_GIF_SET))
5442 break;
5443
5444
5445 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
5446 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
5447 break;
5448
5449 idx = srcu_read_lock(&vcpu->kvm->srcu);
5450 r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
5451 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5452 break;
5453 }
5454 case KVM_GET_SUPPORTED_HV_CPUID:
5455 r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
5456 break;
5457#ifdef CONFIG_KVM_XEN
5458 case KVM_XEN_VCPU_GET_ATTR: {
5459 struct kvm_xen_vcpu_attr xva;
5460
5461 r = -EFAULT;
5462 if (copy_from_user(&xva, argp, sizeof(xva)))
5463 goto out;
5464 r = kvm_xen_vcpu_get_attr(vcpu, &xva);
5465 if (!r && copy_to_user(argp, &xva, sizeof(xva)))
5466 r = -EFAULT;
5467 break;
5468 }
5469 case KVM_XEN_VCPU_SET_ATTR: {
5470 struct kvm_xen_vcpu_attr xva;
5471
5472 r = -EFAULT;
5473 if (copy_from_user(&xva, argp, sizeof(xva)))
5474 goto out;
5475 r = kvm_xen_vcpu_set_attr(vcpu, &xva);
5476 break;
5477 }
5478#endif
5479 case KVM_GET_SREGS2: {
5480 u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL);
5481 r = -ENOMEM;
5482 if (!u.sregs2)
5483 goto out;
5484 __get_sregs2(vcpu, u.sregs2);
5485 r = -EFAULT;
5486 if (copy_to_user(argp, u.sregs2, sizeof(struct kvm_sregs2)))
5487 goto out;
5488 r = 0;
5489 break;
5490 }
5491 case KVM_SET_SREGS2: {
5492 u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2));
5493 if (IS_ERR(u.sregs2)) {
5494 r = PTR_ERR(u.sregs2);
5495 u.sregs2 = NULL;
5496 goto out;
5497 }
5498 r = __set_sregs2(vcpu, u.sregs2);
5499 break;
5500 }
5501 case KVM_HAS_DEVICE_ATTR:
5502 case KVM_GET_DEVICE_ATTR:
5503 case KVM_SET_DEVICE_ATTR:
5504 r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp);
5505 break;
5506 default:
5507 r = -EINVAL;
5508 }
5509out:
5510 kfree(u.buffer);
5511out_nofree:
5512 vcpu_put(vcpu);
5513 return r;
5514}
5515
5516vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5517{
5518 return VM_FAULT_SIGBUS;
5519}
5520
5521static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
5522{
5523 int ret;
5524
5525 if (addr > (unsigned int)(-3 * PAGE_SIZE))
5526 return -EINVAL;
5527 ret = static_call(kvm_x86_set_tss_addr)(kvm, addr);
5528 return ret;
5529}
5530
5531static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
5532 u64 ident_addr)
5533{
5534 return static_call(kvm_x86_set_identity_map_addr)(kvm, ident_addr);
5535}
5536
5537static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
5538 unsigned long kvm_nr_mmu_pages)
5539{
5540 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
5541 return -EINVAL;
5542
5543 mutex_lock(&kvm->slots_lock);
5544
5545 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
5546 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
5547
5548 mutex_unlock(&kvm->slots_lock);
5549 return 0;
5550}
5551
5552static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
5553{
5554 return kvm->arch.n_max_mmu_pages;
5555}
5556
5557static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5558{
5559 struct kvm_pic *pic = kvm->arch.vpic;
5560 int r;
5561
5562 r = 0;
5563 switch (chip->chip_id) {
5564 case KVM_IRQCHIP_PIC_MASTER:
5565 memcpy(&chip->chip.pic, &pic->pics[0],
5566 sizeof(struct kvm_pic_state));
5567 break;
5568 case KVM_IRQCHIP_PIC_SLAVE:
5569 memcpy(&chip->chip.pic, &pic->pics[1],
5570 sizeof(struct kvm_pic_state));
5571 break;
5572 case KVM_IRQCHIP_IOAPIC:
5573 kvm_get_ioapic(kvm, &chip->chip.ioapic);
5574 break;
5575 default:
5576 r = -EINVAL;
5577 break;
5578 }
5579 return r;
5580}
5581
5582static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5583{
5584 struct kvm_pic *pic = kvm->arch.vpic;
5585 int r;
5586
5587 r = 0;
5588 switch (chip->chip_id) {
5589 case KVM_IRQCHIP_PIC_MASTER:
5590 spin_lock(&pic->lock);
5591 memcpy(&pic->pics[0], &chip->chip.pic,
5592 sizeof(struct kvm_pic_state));
5593 spin_unlock(&pic->lock);
5594 break;
5595 case KVM_IRQCHIP_PIC_SLAVE:
5596 spin_lock(&pic->lock);
5597 memcpy(&pic->pics[1], &chip->chip.pic,
5598 sizeof(struct kvm_pic_state));
5599 spin_unlock(&pic->lock);
5600 break;
5601 case KVM_IRQCHIP_IOAPIC:
5602 kvm_set_ioapic(kvm, &chip->chip.ioapic);
5603 break;
5604 default:
5605 r = -EINVAL;
5606 break;
5607 }
5608 kvm_pic_update_irq(pic);
5609 return r;
5610}
5611
5612static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
5613{
5614 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
5615
5616 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
5617
5618 mutex_lock(&kps->lock);
5619 memcpy(ps, &kps->channels, sizeof(*ps));
5620 mutex_unlock(&kps->lock);
5621 return 0;
5622}
5623
5624static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
5625{
5626 int i;
5627 struct kvm_pit *pit = kvm->arch.vpit;
5628
5629 mutex_lock(&pit->pit_state.lock);
5630 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
5631 for (i = 0; i < 3; i++)
5632 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
5633 mutex_unlock(&pit->pit_state.lock);
5634 return 0;
5635}
5636
5637static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
5638{
5639 mutex_lock(&kvm->arch.vpit->pit_state.lock);
5640 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
5641 sizeof(ps->channels));
5642 ps->flags = kvm->arch.vpit->pit_state.flags;
5643 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
5644 memset(&ps->reserved, 0, sizeof(ps->reserved));
5645 return 0;
5646}
5647
5648static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
5649{
5650 int start = 0;
5651 int i;
5652 u32 prev_legacy, cur_legacy;
5653 struct kvm_pit *pit = kvm->arch.vpit;
5654
5655 mutex_lock(&pit->pit_state.lock);
5656 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
5657 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
5658 if (!prev_legacy && cur_legacy)
5659 start = 1;
5660 memcpy(&pit->pit_state.channels, &ps->channels,
5661 sizeof(pit->pit_state.channels));
5662 pit->pit_state.flags = ps->flags;
5663 for (i = 0; i < 3; i++)
5664 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
5665 start && i == 0);
5666 mutex_unlock(&pit->pit_state.lock);
5667 return 0;
5668}
5669
5670static int kvm_vm_ioctl_reinject(struct kvm *kvm,
5671 struct kvm_reinject_control *control)
5672{
5673 struct kvm_pit *pit = kvm->arch.vpit;
5674
5675
5676
5677
5678
5679 mutex_lock(&pit->pit_state.lock);
5680 kvm_pit_set_reinject(pit, control->pit_reinject);
5681 mutex_unlock(&pit->pit_state.lock);
5682
5683 return 0;
5684}
5685
5686void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
5687{
5688
5689
5690
5691
5692
5693
5694
5695 struct kvm_vcpu *vcpu;
5696 int i;
5697
5698 kvm_for_each_vcpu(i, vcpu, kvm)
5699 kvm_vcpu_kick(vcpu);
5700}
5701
5702int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
5703 bool line_status)
5704{
5705 if (!irqchip_in_kernel(kvm))
5706 return -ENXIO;
5707
5708 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
5709 irq_event->irq, irq_event->level,
5710 line_status);
5711 return 0;
5712}
5713
5714int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
5715 struct kvm_enable_cap *cap)
5716{
5717 int r;
5718
5719 if (cap->flags)
5720 return -EINVAL;
5721
5722 switch (cap->cap) {
5723 case KVM_CAP_DISABLE_QUIRKS:
5724 kvm->arch.disabled_quirks = cap->args[0];
5725 r = 0;
5726 break;
5727 case KVM_CAP_SPLIT_IRQCHIP: {
5728 mutex_lock(&kvm->lock);
5729 r = -EINVAL;
5730 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
5731 goto split_irqchip_unlock;
5732 r = -EEXIST;
5733 if (irqchip_in_kernel(kvm))
5734 goto split_irqchip_unlock;
5735 if (kvm->created_vcpus)
5736 goto split_irqchip_unlock;
5737 r = kvm_setup_empty_irq_routing(kvm);
5738 if (r)
5739 goto split_irqchip_unlock;
5740
5741 smp_wmb();
5742 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
5743 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
5744 kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
5745 r = 0;
5746split_irqchip_unlock:
5747 mutex_unlock(&kvm->lock);
5748 break;
5749 }
5750 case KVM_CAP_X2APIC_API:
5751 r = -EINVAL;
5752 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
5753 break;
5754
5755 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
5756 kvm->arch.x2apic_format = true;
5757 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
5758 kvm->arch.x2apic_broadcast_quirk_disabled = true;
5759
5760 r = 0;
5761 break;
5762 case KVM_CAP_X86_DISABLE_EXITS:
5763 r = -EINVAL;
5764 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
5765 break;
5766
5767 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
5768 kvm_can_mwait_in_guest())
5769 kvm->arch.mwait_in_guest = true;
5770 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
5771 kvm->arch.hlt_in_guest = true;
5772 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
5773 kvm->arch.pause_in_guest = true;
5774 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
5775 kvm->arch.cstate_in_guest = true;
5776 r = 0;
5777 break;
5778 case KVM_CAP_MSR_PLATFORM_INFO:
5779 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
5780 r = 0;
5781 break;
5782 case KVM_CAP_EXCEPTION_PAYLOAD:
5783 kvm->arch.exception_payload_enabled = cap->args[0];
5784 r = 0;
5785 break;
5786 case KVM_CAP_X86_USER_SPACE_MSR:
5787 kvm->arch.user_space_msr_mask = cap->args[0];
5788 r = 0;
5789 break;
5790 case KVM_CAP_X86_BUS_LOCK_EXIT:
5791 r = -EINVAL;
5792 if (cap->args[0] & ~KVM_BUS_LOCK_DETECTION_VALID_MODE)
5793 break;
5794
5795 if ((cap->args[0] & KVM_BUS_LOCK_DETECTION_OFF) &&
5796 (cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT))
5797 break;
5798
5799 if (kvm_has_bus_lock_exit &&
5800 cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)
5801 kvm->arch.bus_lock_detection_enabled = true;
5802 r = 0;
5803 break;
5804#ifdef CONFIG_X86_SGX_KVM
5805 case KVM_CAP_SGX_ATTRIBUTE: {
5806 unsigned long allowed_attributes = 0;
5807
5808 r = sgx_set_attribute(&allowed_attributes, cap->args[0]);
5809 if (r)
5810 break;
5811
5812
5813 if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) &&
5814 !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY))
5815 kvm->arch.sgx_provisioning_allowed = true;
5816 else
5817 r = -EINVAL;
5818 break;
5819 }
5820#endif
5821 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
5822 r = -EINVAL;
5823 if (kvm_x86_ops.vm_copy_enc_context_from)
5824 r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
5825 return r;
5826 case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
5827 r = -EINVAL;
5828 if (kvm_x86_ops.vm_move_enc_context_from)
5829 r = kvm_x86_ops.vm_move_enc_context_from(
5830 kvm, cap->args[0]);
5831 return r;
5832 case KVM_CAP_EXIT_HYPERCALL:
5833 if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
5834 r = -EINVAL;
5835 break;
5836 }
5837 kvm->arch.hypercall_exit_enabled = cap->args[0];
5838 r = 0;
5839 break;
5840 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
5841 r = -EINVAL;
5842 if (cap->args[0] & ~1)
5843 break;
5844 kvm->arch.exit_on_emulation_error = cap->args[0];
5845 r = 0;
5846 break;
5847 default:
5848 r = -EINVAL;
5849 break;
5850 }
5851 return r;
5852}
5853
5854static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
5855{
5856 struct kvm_x86_msr_filter *msr_filter;
5857
5858 msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
5859 if (!msr_filter)
5860 return NULL;
5861
5862 msr_filter->default_allow = default_allow;
5863 return msr_filter;
5864}
5865
5866static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
5867{
5868 u32 i;
5869
5870 if (!msr_filter)
5871 return;
5872
5873 for (i = 0; i < msr_filter->count; i++)
5874 kfree(msr_filter->ranges[i].bitmap);
5875
5876 kfree(msr_filter);
5877}
5878
5879static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
5880 struct kvm_msr_filter_range *user_range)
5881{
5882 unsigned long *bitmap = NULL;
5883 size_t bitmap_size;
5884
5885 if (!user_range->nmsrs)
5886 return 0;
5887
5888 if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
5889 return -EINVAL;
5890
5891 if (!user_range->flags)
5892 return -EINVAL;
5893
5894 bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
5895 if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
5896 return -EINVAL;
5897
5898 bitmap = memdup_user((__user u8*)user_range->bitmap, bitmap_size);
5899 if (IS_ERR(bitmap))
5900 return PTR_ERR(bitmap);
5901
5902 msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
5903 .flags = user_range->flags,
5904 .base = user_range->base,
5905 .nmsrs = user_range->nmsrs,
5906 .bitmap = bitmap,
5907 };
5908
5909 msr_filter->count++;
5910 return 0;
5911}
5912
5913static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
5914{
5915 struct kvm_msr_filter __user *user_msr_filter = argp;
5916 struct kvm_x86_msr_filter *new_filter, *old_filter;
5917 struct kvm_msr_filter filter;
5918 bool default_allow;
5919 bool empty = true;
5920 int r = 0;
5921 u32 i;
5922
5923 if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
5924 return -EFAULT;
5925
5926 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
5927 empty &= !filter.ranges[i].nmsrs;
5928
5929 default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
5930 if (empty && !default_allow)
5931 return -EINVAL;
5932
5933 new_filter = kvm_alloc_msr_filter(default_allow);
5934 if (!new_filter)
5935 return -ENOMEM;
5936
5937 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
5938 r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
5939 if (r) {
5940 kvm_free_msr_filter(new_filter);
5941 return r;
5942 }
5943 }
5944
5945 mutex_lock(&kvm->lock);
5946
5947
5948 old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
5949
5950 rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
5951 synchronize_srcu(&kvm->srcu);
5952
5953 kvm_free_msr_filter(old_filter);
5954
5955 kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
5956 mutex_unlock(&kvm->lock);
5957
5958 return 0;
5959}
5960
5961#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
5962static int kvm_arch_suspend_notifier(struct kvm *kvm)
5963{
5964 struct kvm_vcpu *vcpu;
5965 int i, ret = 0;
5966
5967 mutex_lock(&kvm->lock);
5968 kvm_for_each_vcpu(i, vcpu, kvm) {
5969 if (!vcpu->arch.pv_time_enabled)
5970 continue;
5971
5972 ret = kvm_set_guest_paused(vcpu);
5973 if (ret) {
5974 kvm_err("Failed to pause guest VCPU%d: %d\n",
5975 vcpu->vcpu_id, ret);
5976 break;
5977 }
5978 }
5979 mutex_unlock(&kvm->lock);
5980
5981 return ret ? NOTIFY_BAD : NOTIFY_DONE;
5982}
5983
5984int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
5985{
5986 switch (state) {
5987 case PM_HIBERNATION_PREPARE:
5988 case PM_SUSPEND_PREPARE:
5989 return kvm_arch_suspend_notifier(kvm);
5990 }
5991
5992 return NOTIFY_DONE;
5993}
5994#endif
5995
5996static int kvm_vm_ioctl_get_clock(struct kvm *kvm, void __user *argp)
5997{
5998 struct kvm_clock_data data = { 0 };
5999
6000 get_kvmclock(kvm, &data);
6001 if (copy_to_user(argp, &data, sizeof(data)))
6002 return -EFAULT;
6003
6004 return 0;
6005}
6006
6007static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
6008{
6009 struct kvm_arch *ka = &kvm->arch;
6010 struct kvm_clock_data data;
6011 u64 now_raw_ns;
6012
6013 if (copy_from_user(&data, argp, sizeof(data)))
6014 return -EFAULT;
6015
6016
6017
6018
6019
6020 if (data.flags & ~KVM_CLOCK_VALID_FLAGS)
6021 return -EINVAL;
6022
6023 kvm_hv_invalidate_tsc_page(kvm);
6024 kvm_start_pvclock_update(kvm);
6025 pvclock_update_vm_gtod_copy(kvm);
6026
6027
6028
6029
6030
6031
6032
6033
6034 if (data.flags & KVM_CLOCK_REALTIME) {
6035 u64 now_real_ns = ktime_get_real_ns();
6036
6037
6038
6039
6040 if (now_real_ns > data.realtime)
6041 data.clock += now_real_ns - data.realtime;
6042 }
6043
6044 if (ka->use_master_clock)
6045 now_raw_ns = ka->master_kernel_ns;
6046 else
6047 now_raw_ns = get_kvmclock_base_ns();
6048 ka->kvmclock_offset = data.clock - now_raw_ns;
6049 kvm_end_pvclock_update(kvm);
6050 return 0;
6051}
6052
6053long kvm_arch_vm_ioctl(struct file *filp,
6054 unsigned int ioctl, unsigned long arg)
6055{
6056 struct kvm *kvm = filp->private_data;
6057 void __user *argp = (void __user *)arg;
6058 int r = -ENOTTY;
6059
6060
6061
6062
6063
6064 union {
6065 struct kvm_pit_state ps;
6066 struct kvm_pit_state2 ps2;
6067 struct kvm_pit_config pit_config;
6068 } u;
6069
6070 switch (ioctl) {
6071 case KVM_SET_TSS_ADDR:
6072 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
6073 break;
6074 case KVM_SET_IDENTITY_MAP_ADDR: {
6075 u64 ident_addr;
6076
6077 mutex_lock(&kvm->lock);
6078 r = -EINVAL;
6079 if (kvm->created_vcpus)
6080 goto set_identity_unlock;
6081 r = -EFAULT;
6082 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
6083 goto set_identity_unlock;
6084 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
6085set_identity_unlock:
6086 mutex_unlock(&kvm->lock);
6087 break;
6088 }
6089 case KVM_SET_NR_MMU_PAGES:
6090 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
6091 break;
6092 case KVM_GET_NR_MMU_PAGES:
6093 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
6094 break;
6095 case KVM_CREATE_IRQCHIP: {
6096 mutex_lock(&kvm->lock);
6097
6098 r = -EEXIST;
6099 if (irqchip_in_kernel(kvm))
6100 goto create_irqchip_unlock;
6101
6102 r = -EINVAL;
6103 if (kvm->created_vcpus)
6104 goto create_irqchip_unlock;
6105
6106 r = kvm_pic_init(kvm);
6107 if (r)
6108 goto create_irqchip_unlock;
6109
6110 r = kvm_ioapic_init(kvm);
6111 if (r) {
6112 kvm_pic_destroy(kvm);
6113 goto create_irqchip_unlock;
6114 }
6115
6116 r = kvm_setup_default_irq_routing(kvm);
6117 if (r) {
6118 kvm_ioapic_destroy(kvm);
6119 kvm_pic_destroy(kvm);
6120 goto create_irqchip_unlock;
6121 }
6122
6123 smp_wmb();
6124 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
6125 kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
6126 create_irqchip_unlock:
6127 mutex_unlock(&kvm->lock);
6128 break;
6129 }
6130 case KVM_CREATE_PIT:
6131 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
6132 goto create_pit;
6133 case KVM_CREATE_PIT2:
6134 r = -EFAULT;
6135 if (copy_from_user(&u.pit_config, argp,
6136 sizeof(struct kvm_pit_config)))
6137 goto out;
6138 create_pit:
6139 mutex_lock(&kvm->lock);
6140 r = -EEXIST;
6141 if (kvm->arch.vpit)
6142 goto create_pit_unlock;
6143 r = -ENOMEM;
6144 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
6145 if (kvm->arch.vpit)
6146 r = 0;
6147 create_pit_unlock:
6148 mutex_unlock(&kvm->lock);
6149 break;
6150 case KVM_GET_IRQCHIP: {
6151
6152 struct kvm_irqchip *chip;
6153
6154 chip = memdup_user(argp, sizeof(*chip));
6155 if (IS_ERR(chip)) {
6156 r = PTR_ERR(chip);
6157 goto out;
6158 }
6159
6160 r = -ENXIO;
6161 if (!irqchip_kernel(kvm))
6162 goto get_irqchip_out;
6163 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
6164 if (r)
6165 goto get_irqchip_out;
6166 r = -EFAULT;
6167 if (copy_to_user(argp, chip, sizeof(*chip)))
6168 goto get_irqchip_out;
6169 r = 0;
6170 get_irqchip_out:
6171 kfree(chip);
6172 break;
6173 }
6174 case KVM_SET_IRQCHIP: {
6175
6176 struct kvm_irqchip *chip;
6177
6178 chip = memdup_user(argp, sizeof(*chip));
6179 if (IS_ERR(chip)) {
6180 r = PTR_ERR(chip);
6181 goto out;
6182 }
6183
6184 r = -ENXIO;
6185 if (!irqchip_kernel(kvm))
6186 goto set_irqchip_out;
6187 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
6188 set_irqchip_out:
6189 kfree(chip);
6190 break;
6191 }
6192 case KVM_GET_PIT: {
6193 r = -EFAULT;
6194 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
6195 goto out;
6196 r = -ENXIO;
6197 if (!kvm->arch.vpit)
6198 goto out;
6199 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
6200 if (r)
6201 goto out;
6202 r = -EFAULT;
6203 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
6204 goto out;
6205 r = 0;
6206 break;
6207 }
6208 case KVM_SET_PIT: {
6209 r = -EFAULT;
6210 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
6211 goto out;
6212 mutex_lock(&kvm->lock);
6213 r = -ENXIO;
6214 if (!kvm->arch.vpit)
6215 goto set_pit_out;
6216 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
6217set_pit_out:
6218 mutex_unlock(&kvm->lock);
6219 break;
6220 }
6221 case KVM_GET_PIT2: {
6222 r = -ENXIO;
6223 if (!kvm->arch.vpit)
6224 goto out;
6225 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
6226 if (r)
6227 goto out;
6228 r = -EFAULT;
6229 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
6230 goto out;
6231 r = 0;
6232 break;
6233 }
6234 case KVM_SET_PIT2: {
6235 r = -EFAULT;
6236 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
6237 goto out;
6238 mutex_lock(&kvm->lock);
6239 r = -ENXIO;
6240 if (!kvm->arch.vpit)
6241 goto set_pit2_out;
6242 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
6243set_pit2_out:
6244 mutex_unlock(&kvm->lock);
6245 break;
6246 }
6247 case KVM_REINJECT_CONTROL: {
6248 struct kvm_reinject_control control;
6249 r = -EFAULT;
6250 if (copy_from_user(&control, argp, sizeof(control)))
6251 goto out;
6252 r = -ENXIO;
6253 if (!kvm->arch.vpit)
6254 goto out;
6255 r = kvm_vm_ioctl_reinject(kvm, &control);
6256 break;
6257 }
6258 case KVM_SET_BOOT_CPU_ID:
6259 r = 0;
6260 mutex_lock(&kvm->lock);
6261 if (kvm->created_vcpus)
6262 r = -EBUSY;
6263 else
6264 kvm->arch.bsp_vcpu_id = arg;
6265 mutex_unlock(&kvm->lock);
6266 break;
6267#ifdef CONFIG_KVM_XEN
6268 case KVM_XEN_HVM_CONFIG: {
6269 struct kvm_xen_hvm_config xhc;
6270 r = -EFAULT;
6271 if (copy_from_user(&xhc, argp, sizeof(xhc)))
6272 goto out;
6273 r = kvm_xen_hvm_config(kvm, &xhc);
6274 break;
6275 }
6276 case KVM_XEN_HVM_GET_ATTR: {
6277 struct kvm_xen_hvm_attr xha;
6278
6279 r = -EFAULT;
6280 if (copy_from_user(&xha, argp, sizeof(xha)))
6281 goto out;
6282 r = kvm_xen_hvm_get_attr(kvm, &xha);
6283 if (!r && copy_to_user(argp, &xha, sizeof(xha)))
6284 r = -EFAULT;
6285 break;
6286 }
6287 case KVM_XEN_HVM_SET_ATTR: {
6288 struct kvm_xen_hvm_attr xha;
6289
6290 r = -EFAULT;
6291 if (copy_from_user(&xha, argp, sizeof(xha)))
6292 goto out;
6293 r = kvm_xen_hvm_set_attr(kvm, &xha);
6294 break;
6295 }
6296#endif
6297 case KVM_SET_CLOCK:
6298 r = kvm_vm_ioctl_set_clock(kvm, argp);
6299 break;
6300 case KVM_GET_CLOCK:
6301 r = kvm_vm_ioctl_get_clock(kvm, argp);
6302 break;
6303 case KVM_MEMORY_ENCRYPT_OP: {
6304 r = -ENOTTY;
6305 if (kvm_x86_ops.mem_enc_op)
6306 r = static_call(kvm_x86_mem_enc_op)(kvm, argp);
6307 break;
6308 }
6309 case KVM_MEMORY_ENCRYPT_REG_REGION: {
6310 struct kvm_enc_region region;
6311
6312 r = -EFAULT;
6313 if (copy_from_user(®ion, argp, sizeof(region)))
6314 goto out;
6315
6316 r = -ENOTTY;
6317 if (kvm_x86_ops.mem_enc_reg_region)
6318 r = static_call(kvm_x86_mem_enc_reg_region)(kvm, ®ion);
6319 break;
6320 }
6321 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
6322 struct kvm_enc_region region;
6323
6324 r = -EFAULT;
6325 if (copy_from_user(®ion, argp, sizeof(region)))
6326 goto out;
6327
6328 r = -ENOTTY;
6329 if (kvm_x86_ops.mem_enc_unreg_region)
6330 r = static_call(kvm_x86_mem_enc_unreg_region)(kvm, ®ion);
6331 break;
6332 }
6333 case KVM_HYPERV_EVENTFD: {
6334 struct kvm_hyperv_eventfd hvevfd;
6335
6336 r = -EFAULT;
6337 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
6338 goto out;
6339 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
6340 break;
6341 }
6342 case KVM_SET_PMU_EVENT_FILTER:
6343 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
6344 break;
6345 case KVM_X86_SET_MSR_FILTER:
6346 r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
6347 break;
6348 default:
6349 r = -ENOTTY;
6350 }
6351out:
6352 return r;
6353}
6354
6355static void kvm_init_msr_list(void)
6356{
6357 struct x86_pmu_capability x86_pmu;
6358 u32 dummy[2];
6359 unsigned i;
6360
6361 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
6362 "Please update the fixed PMCs in msrs_to_saved_all[]");
6363
6364 perf_get_x86_pmu_capability(&x86_pmu);
6365
6366 num_msrs_to_save = 0;
6367 num_emulated_msrs = 0;
6368 num_msr_based_features = 0;
6369
6370 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
6371 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
6372 continue;
6373
6374
6375
6376
6377
6378 switch (msrs_to_save_all[i]) {
6379 case MSR_IA32_BNDCFGS:
6380 if (!kvm_mpx_supported())
6381 continue;
6382 break;
6383 case MSR_TSC_AUX:
6384 if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
6385 !kvm_cpu_cap_has(X86_FEATURE_RDPID))
6386 continue;
6387 break;
6388 case MSR_IA32_UMWAIT_CONTROL:
6389 if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
6390 continue;
6391 break;
6392 case MSR_IA32_RTIT_CTL:
6393 case MSR_IA32_RTIT_STATUS:
6394 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
6395 continue;
6396 break;
6397 case MSR_IA32_RTIT_CR3_MATCH:
6398 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6399 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
6400 continue;
6401 break;
6402 case MSR_IA32_RTIT_OUTPUT_BASE:
6403 case MSR_IA32_RTIT_OUTPUT_MASK:
6404 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6405 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
6406 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
6407 continue;
6408 break;
6409 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
6410 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6411 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
6412 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
6413 continue;
6414 break;
6415 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
6416 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
6417 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
6418 continue;
6419 break;
6420 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
6421 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
6422 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
6423 continue;
6424 break;
6425 default:
6426 break;
6427 }
6428
6429 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
6430 }
6431
6432 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
6433 if (!static_call(kvm_x86_has_emulated_msr)(NULL, emulated_msrs_all[i]))
6434 continue;
6435
6436 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
6437 }
6438
6439 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
6440 struct kvm_msr_entry msr;
6441
6442 msr.index = msr_based_features_all[i];
6443 if (kvm_get_msr_feature(&msr))
6444 continue;
6445
6446 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
6447 }
6448}
6449
6450static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
6451 const void *v)
6452{
6453 int handled = 0;
6454 int n;
6455
6456 do {
6457 n = min(len, 8);
6458 if (!(lapic_in_kernel(vcpu) &&
6459 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
6460 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
6461 break;
6462 handled += n;
6463 addr += n;
6464 len -= n;
6465 v += n;
6466 } while (len);
6467
6468 return handled;
6469}
6470
6471static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
6472{
6473 int handled = 0;
6474 int n;
6475
6476 do {
6477 n = min(len, 8);
6478 if (!(lapic_in_kernel(vcpu) &&
6479 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
6480 addr, n, v))
6481 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
6482 break;
6483 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
6484 handled += n;
6485 addr += n;
6486 len -= n;
6487 v += n;
6488 } while (len);
6489
6490 return handled;
6491}
6492
6493static void kvm_set_segment(struct kvm_vcpu *vcpu,
6494 struct kvm_segment *var, int seg)
6495{
6496 static_call(kvm_x86_set_segment)(vcpu, var, seg);
6497}
6498
6499void kvm_get_segment(struct kvm_vcpu *vcpu,
6500 struct kvm_segment *var, int seg)
6501{
6502 static_call(kvm_x86_get_segment)(vcpu, var, seg);
6503}
6504
6505gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
6506 struct x86_exception *exception)
6507{
6508 gpa_t t_gpa;
6509
6510 BUG_ON(!mmu_is_nested(vcpu));
6511
6512
6513 access |= PFERR_USER_MASK;
6514 t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
6515
6516 return t_gpa;
6517}
6518
6519gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
6520 struct x86_exception *exception)
6521{
6522 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6523 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6524}
6525EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
6526
6527 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
6528 struct x86_exception *exception)
6529{
6530 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6531 access |= PFERR_FETCH_MASK;
6532 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6533}
6534
6535gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
6536 struct x86_exception *exception)
6537{
6538 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6539 access |= PFERR_WRITE_MASK;
6540 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6541}
6542EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
6543
6544
6545gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
6546 struct x86_exception *exception)
6547{
6548 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
6549}
6550
6551static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
6552 struct kvm_vcpu *vcpu, u32 access,
6553 struct x86_exception *exception)
6554{
6555 void *data = val;
6556 int r = X86EMUL_CONTINUE;
6557
6558 while (bytes) {
6559 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
6560 exception);
6561 unsigned offset = addr & (PAGE_SIZE-1);
6562 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
6563 int ret;
6564
6565 if (gpa == UNMAPPED_GVA)
6566 return X86EMUL_PROPAGATE_FAULT;
6567 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
6568 offset, toread);
6569 if (ret < 0) {
6570 r = X86EMUL_IO_NEEDED;
6571 goto out;
6572 }
6573
6574 bytes -= toread;
6575 data += toread;
6576 addr += toread;
6577 }
6578out:
6579 return r;
6580}
6581
6582
6583static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
6584 gva_t addr, void *val, unsigned int bytes,
6585 struct x86_exception *exception)
6586{
6587 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6588 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6589 unsigned offset;
6590 int ret;
6591
6592
6593 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
6594 exception);
6595 if (unlikely(gpa == UNMAPPED_GVA))
6596 return X86EMUL_PROPAGATE_FAULT;
6597
6598 offset = addr & (PAGE_SIZE-1);
6599 if (WARN_ON(offset + bytes > PAGE_SIZE))
6600 bytes = (unsigned)PAGE_SIZE - offset;
6601 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
6602 offset, bytes);
6603 if (unlikely(ret < 0))
6604 return X86EMUL_IO_NEEDED;
6605
6606 return X86EMUL_CONTINUE;
6607}
6608
6609int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
6610 gva_t addr, void *val, unsigned int bytes,
6611 struct x86_exception *exception)
6612{
6613 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6614
6615
6616
6617
6618
6619
6620
6621 memset(exception, 0, sizeof(*exception));
6622 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
6623 exception);
6624}
6625EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
6626
6627static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
6628 gva_t addr, void *val, unsigned int bytes,
6629 struct x86_exception *exception, bool system)
6630{
6631 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6632 u32 access = 0;
6633
6634 if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3)
6635 access |= PFERR_USER_MASK;
6636
6637 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
6638}
6639
6640static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
6641 unsigned long addr, void *val, unsigned int bytes)
6642{
6643 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6644 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
6645
6646 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
6647}
6648
6649static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
6650 struct kvm_vcpu *vcpu, u32 access,
6651 struct x86_exception *exception)
6652{
6653 void *data = val;
6654 int r = X86EMUL_CONTINUE;
6655
6656 while (bytes) {
6657 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
6658 access,
6659 exception);
6660 unsigned offset = addr & (PAGE_SIZE-1);
6661 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
6662 int ret;
6663
6664 if (gpa == UNMAPPED_GVA)
6665 return X86EMUL_PROPAGATE_FAULT;
6666 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
6667 if (ret < 0) {
6668 r = X86EMUL_IO_NEEDED;
6669 goto out;
6670 }
6671
6672 bytes -= towrite;
6673 data += towrite;
6674 addr += towrite;
6675 }
6676out:
6677 return r;
6678}
6679
6680static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
6681 unsigned int bytes, struct x86_exception *exception,
6682 bool system)
6683{
6684 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6685 u32 access = PFERR_WRITE_MASK;
6686
6687 if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3)
6688 access |= PFERR_USER_MASK;
6689
6690 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
6691 access, exception);
6692}
6693
6694int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
6695 unsigned int bytes, struct x86_exception *exception)
6696{
6697
6698 vcpu->arch.l1tf_flush_l1d = true;
6699
6700 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
6701 PFERR_WRITE_MASK, exception);
6702}
6703EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
6704
6705int handle_ud(struct kvm_vcpu *vcpu)
6706{
6707 static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
6708 int emul_type = EMULTYPE_TRAP_UD;
6709 char sig[5];
6710 struct x86_exception e;
6711
6712 if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0)))
6713 return 1;
6714
6715 if (force_emulation_prefix &&
6716 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
6717 sig, sizeof(sig), &e) == 0 &&
6718 memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
6719 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
6720 emul_type = EMULTYPE_TRAP_UD_FORCED;
6721 }
6722
6723 return kvm_emulate_instruction(vcpu, emul_type);
6724}
6725EXPORT_SYMBOL_GPL(handle_ud);
6726
6727static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
6728 gpa_t gpa, bool write)
6729{
6730
6731 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
6732 return 1;
6733
6734 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
6735 trace_vcpu_match_mmio(gva, gpa, write, true);
6736 return 1;
6737 }
6738
6739 return 0;
6740}
6741
6742static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
6743 gpa_t *gpa, struct x86_exception *exception,
6744 bool write)
6745{
6746 u32 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0)
6747 | (write ? PFERR_WRITE_MASK : 0);
6748
6749
6750
6751
6752
6753
6754 if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) ||
6755 !permission_fault(vcpu, vcpu->arch.walk_mmu,
6756 vcpu->arch.mmio_access, 0, access))) {
6757 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
6758 (gva & (PAGE_SIZE - 1));
6759 trace_vcpu_match_mmio(gva, *gpa, write, false);
6760 return 1;
6761 }
6762
6763 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6764
6765 if (*gpa == UNMAPPED_GVA)
6766 return -1;
6767
6768 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
6769}
6770
6771int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
6772 const void *val, int bytes)
6773{
6774 int ret;
6775
6776 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
6777 if (ret < 0)
6778 return 0;
6779 kvm_page_track_write(vcpu, gpa, val, bytes);
6780 return 1;
6781}
6782
6783struct read_write_emulator_ops {
6784 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
6785 int bytes);
6786 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
6787 void *val, int bytes);
6788 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
6789 int bytes, void *val);
6790 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
6791 void *val, int bytes);
6792 bool write;
6793};
6794
6795static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
6796{
6797 if (vcpu->mmio_read_completed) {
6798 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
6799 vcpu->mmio_fragments[0].gpa, val);
6800 vcpu->mmio_read_completed = 0;
6801 return 1;
6802 }
6803
6804 return 0;
6805}
6806
6807static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
6808 void *val, int bytes)
6809{
6810 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
6811}
6812
6813static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
6814 void *val, int bytes)
6815{
6816 return emulator_write_phys(vcpu, gpa, val, bytes);
6817}
6818
6819static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
6820{
6821 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
6822 return vcpu_mmio_write(vcpu, gpa, bytes, val);
6823}
6824
6825static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
6826 void *val, int bytes)
6827{
6828 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
6829 return X86EMUL_IO_NEEDED;
6830}
6831
6832static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
6833 void *val, int bytes)
6834{
6835 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
6836
6837 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
6838 return X86EMUL_CONTINUE;
6839}
6840
6841static const struct read_write_emulator_ops read_emultor = {
6842 .read_write_prepare = read_prepare,
6843 .read_write_emulate = read_emulate,
6844 .read_write_mmio = vcpu_mmio_read,
6845 .read_write_exit_mmio = read_exit_mmio,
6846};
6847
6848static const struct read_write_emulator_ops write_emultor = {
6849 .read_write_emulate = write_emulate,
6850 .read_write_mmio = write_mmio,
6851 .read_write_exit_mmio = write_exit_mmio,
6852 .write = true,
6853};
6854
6855static int emulator_read_write_onepage(unsigned long addr, void *val,
6856 unsigned int bytes,
6857 struct x86_exception *exception,
6858 struct kvm_vcpu *vcpu,
6859 const struct read_write_emulator_ops *ops)
6860{
6861 gpa_t gpa;
6862 int handled, ret;
6863 bool write = ops->write;
6864 struct kvm_mmio_fragment *frag;
6865 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6866
6867
6868
6869
6870
6871
6872
6873
6874 if (ctxt->gpa_available && emulator_can_use_gpa(ctxt) &&
6875 (addr & ~PAGE_MASK) == (ctxt->gpa_val & ~PAGE_MASK)) {
6876 gpa = ctxt->gpa_val;
6877 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
6878 } else {
6879 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
6880 if (ret < 0)
6881 return X86EMUL_PROPAGATE_FAULT;
6882 }
6883
6884 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
6885 return X86EMUL_CONTINUE;
6886
6887
6888
6889
6890 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
6891 if (handled == bytes)
6892 return X86EMUL_CONTINUE;
6893
6894 gpa += handled;
6895 bytes -= handled;
6896 val += handled;
6897
6898 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
6899 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
6900 frag->gpa = gpa;
6901 frag->data = val;
6902 frag->len = bytes;
6903 return X86EMUL_CONTINUE;
6904}
6905
6906static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
6907 unsigned long addr,
6908 void *val, unsigned int bytes,
6909 struct x86_exception *exception,
6910 const struct read_write_emulator_ops *ops)
6911{
6912 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6913 gpa_t gpa;
6914 int rc;
6915
6916 if (ops->read_write_prepare &&
6917 ops->read_write_prepare(vcpu, val, bytes))
6918 return X86EMUL_CONTINUE;
6919
6920 vcpu->mmio_nr_fragments = 0;
6921
6922
6923 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
6924 int now;
6925
6926 now = -addr & ~PAGE_MASK;
6927 rc = emulator_read_write_onepage(addr, val, now, exception,
6928 vcpu, ops);
6929
6930 if (rc != X86EMUL_CONTINUE)
6931 return rc;
6932 addr += now;
6933 if (ctxt->mode != X86EMUL_MODE_PROT64)
6934 addr = (u32)addr;
6935 val += now;
6936 bytes -= now;
6937 }
6938
6939 rc = emulator_read_write_onepage(addr, val, bytes, exception,
6940 vcpu, ops);
6941 if (rc != X86EMUL_CONTINUE)
6942 return rc;
6943
6944 if (!vcpu->mmio_nr_fragments)
6945 return rc;
6946
6947 gpa = vcpu->mmio_fragments[0].gpa;
6948
6949 vcpu->mmio_needed = 1;
6950 vcpu->mmio_cur_fragment = 0;
6951
6952 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
6953 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
6954 vcpu->run->exit_reason = KVM_EXIT_MMIO;
6955 vcpu->run->mmio.phys_addr = gpa;
6956
6957 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
6958}
6959
6960static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
6961 unsigned long addr,
6962 void *val,
6963 unsigned int bytes,
6964 struct x86_exception *exception)
6965{
6966 return emulator_read_write(ctxt, addr, val, bytes,
6967 exception, &read_emultor);
6968}
6969
6970static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
6971 unsigned long addr,
6972 const void *val,
6973 unsigned int bytes,
6974 struct x86_exception *exception)
6975{
6976 return emulator_read_write(ctxt, addr, (void *)val, bytes,
6977 exception, &write_emultor);
6978}
6979
6980#define CMPXCHG_TYPE(t, ptr, old, new) \
6981 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
6982
6983#ifdef CONFIG_X86_64
6984# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
6985#else
6986# define CMPXCHG64(ptr, old, new) \
6987 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
6988#endif
6989
6990static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
6991 unsigned long addr,
6992 const void *old,
6993 const void *new,
6994 unsigned int bytes,
6995 struct x86_exception *exception)
6996{
6997 struct kvm_host_map map;
6998 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6999 u64 page_line_mask;
7000 gpa_t gpa;
7001 char *kaddr;
7002 bool exchanged;
7003
7004
7005 if (bytes > 8 || (bytes & (bytes - 1)))
7006 goto emul_write;
7007
7008 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
7009
7010 if (gpa == UNMAPPED_GVA ||
7011 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
7012 goto emul_write;
7013
7014
7015
7016
7017
7018 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
7019 page_line_mask = ~(cache_line_size() - 1);
7020 else
7021 page_line_mask = PAGE_MASK;
7022
7023 if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
7024 goto emul_write;
7025
7026 if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
7027 goto emul_write;
7028
7029 kaddr = map.hva + offset_in_page(gpa);
7030
7031 switch (bytes) {
7032 case 1:
7033 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
7034 break;
7035 case 2:
7036 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
7037 break;
7038 case 4:
7039 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
7040 break;
7041 case 8:
7042 exchanged = CMPXCHG64(kaddr, old, new);
7043 break;
7044 default:
7045 BUG();
7046 }
7047
7048 kvm_vcpu_unmap(vcpu, &map, true);
7049
7050 if (!exchanged)
7051 return X86EMUL_CMPXCHG_FAILED;
7052
7053 kvm_page_track_write(vcpu, gpa, new, bytes);
7054
7055 return X86EMUL_CONTINUE;
7056
7057emul_write:
7058 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
7059
7060 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
7061}
7062
7063static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
7064{
7065 int r = 0, i;
7066
7067 for (i = 0; i < vcpu->arch.pio.count; i++) {
7068 if (vcpu->arch.pio.in)
7069 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
7070 vcpu->arch.pio.size, pd);
7071 else
7072 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
7073 vcpu->arch.pio.port, vcpu->arch.pio.size,
7074 pd);
7075 if (r)
7076 break;
7077 pd += vcpu->arch.pio.size;
7078 }
7079 return r;
7080}
7081
7082static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
7083 unsigned short port,
7084 unsigned int count, bool in)
7085{
7086 vcpu->arch.pio.port = port;
7087 vcpu->arch.pio.in = in;
7088 vcpu->arch.pio.count = count;
7089 vcpu->arch.pio.size = size;
7090
7091 if (!kernel_pio(vcpu, vcpu->arch.pio_data))
7092 return 1;
7093
7094 vcpu->run->exit_reason = KVM_EXIT_IO;
7095 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
7096 vcpu->run->io.size = size;
7097 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
7098 vcpu->run->io.count = count;
7099 vcpu->run->io.port = port;
7100
7101 return 0;
7102}
7103
7104static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
7105 unsigned short port, unsigned int count)
7106{
7107 WARN_ON(vcpu->arch.pio.count);
7108 memset(vcpu->arch.pio_data, 0, size * count);
7109 return emulator_pio_in_out(vcpu, size, port, count, true);
7110}
7111
7112static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
7113{
7114 int size = vcpu->arch.pio.size;
7115 unsigned count = vcpu->arch.pio.count;
7116 memcpy(val, vcpu->arch.pio_data, size * count);
7117 trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
7118 vcpu->arch.pio.count = 0;
7119}
7120
7121static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
7122 unsigned short port, void *val, unsigned int count)
7123{
7124 if (vcpu->arch.pio.count) {
7125
7126
7127
7128
7129
7130
7131
7132 } else {
7133 int r = __emulator_pio_in(vcpu, size, port, count);
7134 if (!r)
7135 return r;
7136
7137
7138 }
7139
7140 complete_emulator_pio_in(vcpu, val);
7141 return 1;
7142}
7143
7144static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
7145 int size, unsigned short port, void *val,
7146 unsigned int count)
7147{
7148 return emulator_pio_in(emul_to_vcpu(ctxt), size, port, val, count);
7149
7150}
7151
7152static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
7153 unsigned short port, const void *val,
7154 unsigned int count)
7155{
7156 int ret;
7157
7158 memcpy(vcpu->arch.pio_data, val, size * count);
7159 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
7160 ret = emulator_pio_in_out(vcpu, size, port, count, false);
7161 if (ret)
7162 vcpu->arch.pio.count = 0;
7163
7164 return ret;
7165}
7166
7167static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
7168 int size, unsigned short port,
7169 const void *val, unsigned int count)
7170{
7171 return emulator_pio_out(emul_to_vcpu(ctxt), size, port, val, count);
7172}
7173
7174static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
7175{
7176 return static_call(kvm_x86_get_segment_base)(vcpu, seg);
7177}
7178
7179static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
7180{
7181 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
7182}
7183
7184static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
7185{
7186 if (!need_emulate_wbinvd(vcpu))
7187 return X86EMUL_CONTINUE;
7188
7189 if (static_call(kvm_x86_has_wbinvd_exit)()) {
7190 int cpu = get_cpu();
7191
7192 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
7193 on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
7194 wbinvd_ipi, NULL, 1);
7195 put_cpu();
7196 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
7197 } else
7198 wbinvd();
7199 return X86EMUL_CONTINUE;
7200}
7201
7202int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
7203{
7204 kvm_emulate_wbinvd_noskip(vcpu);
7205 return kvm_skip_emulated_instruction(vcpu);
7206}
7207EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
7208
7209
7210
7211static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
7212{
7213 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
7214}
7215
7216static void emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
7217 unsigned long *dest)
7218{
7219 kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
7220}
7221
7222static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
7223 unsigned long value)
7224{
7225
7226 return kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
7227}
7228
7229static u64 mk_cr_64(u64 curr_cr, u32 new_val)
7230{
7231 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
7232}
7233
7234static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
7235{
7236 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7237 unsigned long value;
7238
7239 switch (cr) {
7240 case 0:
7241 value = kvm_read_cr0(vcpu);
7242 break;
7243 case 2:
7244 value = vcpu->arch.cr2;
7245 break;
7246 case 3:
7247 value = kvm_read_cr3(vcpu);
7248 break;
7249 case 4:
7250 value = kvm_read_cr4(vcpu);
7251 break;
7252 case 8:
7253 value = kvm_get_cr8(vcpu);
7254 break;
7255 default:
7256 kvm_err("%s: unexpected cr %u\n", __func__, cr);
7257 return 0;
7258 }
7259
7260 return value;
7261}
7262
7263static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
7264{
7265 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7266 int res = 0;
7267
7268 switch (cr) {
7269 case 0:
7270 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
7271 break;
7272 case 2:
7273 vcpu->arch.cr2 = val;
7274 break;
7275 case 3:
7276 res = kvm_set_cr3(vcpu, val);
7277 break;
7278 case 4:
7279 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
7280 break;
7281 case 8:
7282 res = kvm_set_cr8(vcpu, val);
7283 break;
7284 default:
7285 kvm_err("%s: unexpected cr %u\n", __func__, cr);
7286 res = -1;
7287 }
7288
7289 return res;
7290}
7291
7292static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
7293{
7294 return static_call(kvm_x86_get_cpl)(emul_to_vcpu(ctxt));
7295}
7296
7297static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7298{
7299 static_call(kvm_x86_get_gdt)(emul_to_vcpu(ctxt), dt);
7300}
7301
7302static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7303{
7304 static_call(kvm_x86_get_idt)(emul_to_vcpu(ctxt), dt);
7305}
7306
7307static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7308{
7309 static_call(kvm_x86_set_gdt)(emul_to_vcpu(ctxt), dt);
7310}
7311
7312static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7313{
7314 static_call(kvm_x86_set_idt)(emul_to_vcpu(ctxt), dt);
7315}
7316
7317static unsigned long emulator_get_cached_segment_base(
7318 struct x86_emulate_ctxt *ctxt, int seg)
7319{
7320 return get_segment_base(emul_to_vcpu(ctxt), seg);
7321}
7322
7323static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
7324 struct desc_struct *desc, u32 *base3,
7325 int seg)
7326{
7327 struct kvm_segment var;
7328
7329 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
7330 *selector = var.selector;
7331
7332 if (var.unusable) {
7333 memset(desc, 0, sizeof(*desc));
7334 if (base3)
7335 *base3 = 0;
7336 return false;
7337 }
7338
7339 if (var.g)
7340 var.limit >>= 12;
7341 set_desc_limit(desc, var.limit);
7342 set_desc_base(desc, (unsigned long)var.base);
7343#ifdef CONFIG_X86_64
7344 if (base3)
7345 *base3 = var.base >> 32;
7346#endif
7347 desc->type = var.type;
7348 desc->s = var.s;
7349 desc->dpl = var.dpl;
7350 desc->p = var.present;
7351 desc->avl = var.avl;
7352 desc->l = var.l;
7353 desc->d = var.db;
7354 desc->g = var.g;
7355
7356 return true;
7357}
7358
7359static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
7360 struct desc_struct *desc, u32 base3,
7361 int seg)
7362{
7363 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7364 struct kvm_segment var;
7365
7366 var.selector = selector;
7367 var.base = get_desc_base(desc);
7368#ifdef CONFIG_X86_64
7369 var.base |= ((u64)base3) << 32;
7370#endif
7371 var.limit = get_desc_limit(desc);
7372 if (desc->g)
7373 var.limit = (var.limit << 12) | 0xfff;
7374 var.type = desc->type;
7375 var.dpl = desc->dpl;
7376 var.db = desc->d;
7377 var.s = desc->s;
7378 var.l = desc->l;
7379 var.g = desc->g;
7380 var.avl = desc->avl;
7381 var.present = desc->p;
7382 var.unusable = !var.present;
7383 var.padding = 0;
7384
7385 kvm_set_segment(vcpu, &var, seg);
7386 return;
7387}
7388
7389static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
7390 u32 msr_index, u64 *pdata)
7391{
7392 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7393 int r;
7394
7395 r = kvm_get_msr(vcpu, msr_index, pdata);
7396
7397 if (r && kvm_get_msr_user_space(vcpu, msr_index, r)) {
7398
7399 return X86EMUL_IO_NEEDED;
7400 }
7401
7402 return r;
7403}
7404
7405static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
7406 u32 msr_index, u64 data)
7407{
7408 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7409 int r;
7410
7411 r = kvm_set_msr(vcpu, msr_index, data);
7412
7413 if (r && kvm_set_msr_user_space(vcpu, msr_index, data, r)) {
7414
7415 return X86EMUL_IO_NEEDED;
7416 }
7417
7418 return r;
7419}
7420
7421static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
7422{
7423 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7424
7425 return vcpu->arch.smbase;
7426}
7427
7428static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
7429{
7430 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7431
7432 vcpu->arch.smbase = smbase;
7433}
7434
7435static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
7436 u32 pmc)
7437{
7438 if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
7439 return 0;
7440 return -EINVAL;
7441}
7442
7443static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
7444 u32 pmc, u64 *pdata)
7445{
7446 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
7447}
7448
7449static void emulator_halt(struct x86_emulate_ctxt *ctxt)
7450{
7451 emul_to_vcpu(ctxt)->arch.halt_request = 1;
7452}
7453
7454static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
7455 struct x86_instruction_info *info,
7456 enum x86_intercept_stage stage)
7457{
7458 return static_call(kvm_x86_check_intercept)(emul_to_vcpu(ctxt), info, stage,
7459 &ctxt->exception);
7460}
7461
7462static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
7463 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx,
7464 bool exact_only)
7465{
7466 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
7467}
7468
7469static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
7470{
7471 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
7472}
7473
7474static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
7475{
7476 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
7477}
7478
7479static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
7480{
7481 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
7482}
7483
7484static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
7485{
7486 return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
7487}
7488
7489static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
7490{
7491 kvm_register_write_raw(emul_to_vcpu(ctxt), reg, val);
7492}
7493
7494static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
7495{
7496 static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
7497}
7498
7499static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
7500{
7501 return emul_to_vcpu(ctxt)->arch.hflags;
7502}
7503
7504static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
7505{
7506 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7507
7508 kvm_smm_changed(vcpu, false);
7509}
7510
7511static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
7512 const char *smstate)
7513{
7514 return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
7515}
7516
7517static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
7518{
7519 kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
7520}
7521
7522static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
7523{
7524 return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
7525}
7526
7527static const struct x86_emulate_ops emulate_ops = {
7528 .read_gpr = emulator_read_gpr,
7529 .write_gpr = emulator_write_gpr,
7530 .read_std = emulator_read_std,
7531 .write_std = emulator_write_std,
7532 .read_phys = kvm_read_guest_phys_system,
7533 .fetch = kvm_fetch_guest_virt,
7534 .read_emulated = emulator_read_emulated,
7535 .write_emulated = emulator_write_emulated,
7536 .cmpxchg_emulated = emulator_cmpxchg_emulated,
7537 .invlpg = emulator_invlpg,
7538 .pio_in_emulated = emulator_pio_in_emulated,
7539 .pio_out_emulated = emulator_pio_out_emulated,
7540 .get_segment = emulator_get_segment,
7541 .set_segment = emulator_set_segment,
7542 .get_cached_segment_base = emulator_get_cached_segment_base,
7543 .get_gdt = emulator_get_gdt,
7544 .get_idt = emulator_get_idt,
7545 .set_gdt = emulator_set_gdt,
7546 .set_idt = emulator_set_idt,
7547 .get_cr = emulator_get_cr,
7548 .set_cr = emulator_set_cr,
7549 .cpl = emulator_get_cpl,
7550 .get_dr = emulator_get_dr,
7551 .set_dr = emulator_set_dr,
7552 .get_smbase = emulator_get_smbase,
7553 .set_smbase = emulator_set_smbase,
7554 .set_msr = emulator_set_msr,
7555 .get_msr = emulator_get_msr,
7556 .check_pmc = emulator_check_pmc,
7557 .read_pmc = emulator_read_pmc,
7558 .halt = emulator_halt,
7559 .wbinvd = emulator_wbinvd,
7560 .fix_hypercall = emulator_fix_hypercall,
7561 .intercept = emulator_intercept,
7562 .get_cpuid = emulator_get_cpuid,
7563 .guest_has_long_mode = emulator_guest_has_long_mode,
7564 .guest_has_movbe = emulator_guest_has_movbe,
7565 .guest_has_fxsr = emulator_guest_has_fxsr,
7566 .set_nmi_mask = emulator_set_nmi_mask,
7567 .get_hflags = emulator_get_hflags,
7568 .exiting_smm = emulator_exiting_smm,
7569 .leave_smm = emulator_leave_smm,
7570 .triple_fault = emulator_triple_fault,
7571 .set_xcr = emulator_set_xcr,
7572};
7573
7574static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
7575{
7576 u32 int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
7577
7578
7579
7580
7581
7582
7583
7584 if (int_shadow & mask)
7585 mask = 0;
7586 if (unlikely(int_shadow || mask)) {
7587 static_call(kvm_x86_set_interrupt_shadow)(vcpu, mask);
7588 if (!mask)
7589 kvm_make_request(KVM_REQ_EVENT, vcpu);
7590 }
7591}
7592
7593static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
7594{
7595 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7596 if (ctxt->exception.vector == PF_VECTOR)
7597 return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
7598
7599 if (ctxt->exception.error_code_valid)
7600 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
7601 ctxt->exception.error_code);
7602 else
7603 kvm_queue_exception(vcpu, ctxt->exception.vector);
7604 return false;
7605}
7606
7607static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
7608{
7609 struct x86_emulate_ctxt *ctxt;
7610
7611 ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT);
7612 if (!ctxt) {
7613 pr_err("kvm: failed to allocate vcpu's emulator\n");
7614 return NULL;
7615 }
7616
7617 ctxt->vcpu = vcpu;
7618 ctxt->ops = &emulate_ops;
7619 vcpu->arch.emulate_ctxt = ctxt;
7620
7621 return ctxt;
7622}
7623
7624static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
7625{
7626 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7627 int cs_db, cs_l;
7628
7629 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
7630
7631 ctxt->gpa_available = false;
7632 ctxt->eflags = kvm_get_rflags(vcpu);
7633 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
7634
7635 ctxt->eip = kvm_rip_read(vcpu);
7636 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
7637 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
7638 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
7639 cs_db ? X86EMUL_MODE_PROT32 :
7640 X86EMUL_MODE_PROT16;
7641 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
7642 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
7643 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
7644
7645 ctxt->interruptibility = 0;
7646 ctxt->have_exception = false;
7647 ctxt->exception.vector = -1;
7648 ctxt->perm_ok = false;
7649
7650 init_decode_cache(ctxt);
7651 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
7652}
7653
7654void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
7655{
7656 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7657 int ret;
7658
7659 init_emulate_ctxt(vcpu);
7660
7661 ctxt->op_bytes = 2;
7662 ctxt->ad_bytes = 2;
7663 ctxt->_eip = ctxt->eip + inc_eip;
7664 ret = emulate_int_real(ctxt, irq);
7665
7666 if (ret != X86EMUL_CONTINUE) {
7667 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
7668 } else {
7669 ctxt->eip = ctxt->_eip;
7670 kvm_rip_write(vcpu, ctxt->eip);
7671 kvm_set_rflags(vcpu, ctxt->eflags);
7672 }
7673}
7674EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
7675
7676static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
7677 u8 ndata, u8 *insn_bytes, u8 insn_size)
7678{
7679 struct kvm_run *run = vcpu->run;
7680 u64 info[5];
7681 u8 info_start;
7682
7683
7684
7685
7686
7687 memset(&info, 0, sizeof(info));
7688
7689 static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1],
7690 &info[2], (u32 *)&info[3],
7691 (u32 *)&info[4]);
7692
7693 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7694 run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
7695
7696
7697
7698
7699
7700
7701 if (WARN_ON_ONCE(ndata > 4))
7702 ndata = 4;
7703
7704
7705 info_start = 1;
7706 run->emulation_failure.flags = 0;
7707
7708 if (insn_size) {
7709 BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) +
7710 sizeof(run->emulation_failure.insn_bytes) != 16));
7711 info_start += 2;
7712 run->emulation_failure.flags |=
7713 KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
7714 run->emulation_failure.insn_size = insn_size;
7715 memset(run->emulation_failure.insn_bytes, 0x90,
7716 sizeof(run->emulation_failure.insn_bytes));
7717 memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size);
7718 }
7719
7720 memcpy(&run->internal.data[info_start], info, sizeof(info));
7721 memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data,
7722 ndata * sizeof(data[0]));
7723
7724 run->emulation_failure.ndata = info_start + ARRAY_SIZE(info) + ndata;
7725}
7726
7727static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu)
7728{
7729 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7730
7731 prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data,
7732 ctxt->fetch.end - ctxt->fetch.data);
7733}
7734
7735void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
7736 u8 ndata)
7737{
7738 prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
7739}
7740EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
7741
7742void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
7743{
7744 __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
7745}
7746EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
7747
7748static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
7749{
7750 struct kvm *kvm = vcpu->kvm;
7751
7752 ++vcpu->stat.insn_emulation_fail;
7753 trace_kvm_emulate_insn_failed(vcpu);
7754
7755 if (emulation_type & EMULTYPE_VMWARE_GP) {
7756 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
7757 return 1;
7758 }
7759
7760 if (kvm->arch.exit_on_emulation_error ||
7761 (emulation_type & EMULTYPE_SKIP)) {
7762 prepare_emulation_ctxt_failure_exit(vcpu);
7763 return 0;
7764 }
7765
7766 kvm_queue_exception(vcpu, UD_VECTOR);
7767
7768 if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
7769 prepare_emulation_ctxt_failure_exit(vcpu);
7770 return 0;
7771 }
7772
7773 return 1;
7774}
7775
7776static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
7777 bool write_fault_to_shadow_pgtable,
7778 int emulation_type)
7779{
7780 gpa_t gpa = cr2_or_gpa;
7781 kvm_pfn_t pfn;
7782
7783 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
7784 return false;
7785
7786 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
7787 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
7788 return false;
7789
7790 if (!vcpu->arch.mmu->direct_map) {
7791
7792
7793
7794
7795 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
7796
7797
7798
7799
7800
7801 if (gpa == UNMAPPED_GVA)
7802 return true;
7803 }
7804
7805
7806
7807
7808
7809
7810
7811 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
7812
7813
7814
7815
7816
7817 if (is_error_noslot_pfn(pfn))
7818 return false;
7819
7820 kvm_release_pfn_clean(pfn);
7821
7822
7823 if (vcpu->arch.mmu->direct_map) {
7824 unsigned int indirect_shadow_pages;
7825
7826 write_lock(&vcpu->kvm->mmu_lock);
7827 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
7828 write_unlock(&vcpu->kvm->mmu_lock);
7829
7830 if (indirect_shadow_pages)
7831 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7832
7833 return true;
7834 }
7835
7836
7837
7838
7839
7840
7841 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7842
7843
7844
7845
7846
7847
7848 return !write_fault_to_shadow_pgtable;
7849}
7850
7851static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
7852 gpa_t cr2_or_gpa, int emulation_type)
7853{
7854 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7855 unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
7856
7857 last_retry_eip = vcpu->arch.last_retry_eip;
7858 last_retry_addr = vcpu->arch.last_retry_addr;
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
7874
7875 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
7876 return false;
7877
7878 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
7879 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
7880 return false;
7881
7882 if (x86_page_table_writing_insn(ctxt))
7883 return false;
7884
7885 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
7886 return false;
7887
7888 vcpu->arch.last_retry_eip = ctxt->eip;
7889 vcpu->arch.last_retry_addr = cr2_or_gpa;
7890
7891 if (!vcpu->arch.mmu->direct_map)
7892 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
7893
7894 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7895
7896 return true;
7897}
7898
7899static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
7900static int complete_emulated_pio(struct kvm_vcpu *vcpu);
7901
7902static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
7903{
7904 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
7905
7906 if (entering_smm) {
7907 vcpu->arch.hflags |= HF_SMM_MASK;
7908 } else {
7909 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
7910
7911
7912 kvm_make_request(KVM_REQ_EVENT, vcpu);
7913
7914
7915
7916
7917
7918
7919 vcpu->arch.pdptrs_from_userspace = false;
7920 }
7921
7922 kvm_mmu_reset_context(vcpu);
7923}
7924
7925static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
7926 unsigned long *db)
7927{
7928 u32 dr6 = 0;
7929 int i;
7930 u32 enable, rwlen;
7931
7932 enable = dr7;
7933 rwlen = dr7 >> 16;
7934 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
7935 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
7936 dr6 |= (1 << i);
7937 return dr6;
7938}
7939
7940static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
7941{
7942 struct kvm_run *kvm_run = vcpu->run;
7943
7944 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
7945 kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW;
7946 kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
7947 kvm_run->debug.arch.exception = DB_VECTOR;
7948 kvm_run->exit_reason = KVM_EXIT_DEBUG;
7949 return 0;
7950 }
7951 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
7952 return 1;
7953}
7954
7955int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
7956{
7957 unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
7958 int r;
7959
7960 r = static_call(kvm_x86_skip_emulated_instruction)(vcpu);
7961 if (unlikely(!r))
7962 return 0;
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972 if (unlikely(rflags & X86_EFLAGS_TF))
7973 r = kvm_vcpu_do_singlestep(vcpu);
7974 return r;
7975}
7976EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
7977
7978static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
7979{
7980 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
7981 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
7982 struct kvm_run *kvm_run = vcpu->run;
7983 unsigned long eip = kvm_get_linear_rip(vcpu);
7984 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
7985 vcpu->arch.guest_debug_dr7,
7986 vcpu->arch.eff_db);
7987
7988 if (dr6 != 0) {
7989 kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
7990 kvm_run->debug.arch.pc = eip;
7991 kvm_run->debug.arch.exception = DB_VECTOR;
7992 kvm_run->exit_reason = KVM_EXIT_DEBUG;
7993 *r = 0;
7994 return true;
7995 }
7996 }
7997
7998 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
7999 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
8000 unsigned long eip = kvm_get_linear_rip(vcpu);
8001 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
8002 vcpu->arch.dr7,
8003 vcpu->arch.db);
8004
8005 if (dr6 != 0) {
8006 kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
8007 *r = 1;
8008 return true;
8009 }
8010 }
8011
8012 return false;
8013}
8014
8015static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
8016{
8017 switch (ctxt->opcode_len) {
8018 case 1:
8019 switch (ctxt->b) {
8020 case 0xe4:
8021 case 0xe5:
8022 case 0xec:
8023 case 0xed:
8024 case 0xe6:
8025 case 0xe7:
8026 case 0xee:
8027 case 0xef:
8028 case 0x6c:
8029 case 0x6d:
8030 case 0x6e:
8031 case 0x6f:
8032 return true;
8033 }
8034 break;
8035 case 2:
8036 switch (ctxt->b) {
8037 case 0x33:
8038 return true;
8039 }
8040 break;
8041 }
8042
8043 return false;
8044}
8045
8046
8047
8048
8049int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
8050 void *insn, int insn_len)
8051{
8052 int r = EMULATION_OK;
8053 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8054
8055 init_emulate_ctxt(vcpu);
8056
8057
8058
8059
8060
8061
8062 if (!(emulation_type & EMULTYPE_SKIP) &&
8063 kvm_vcpu_check_breakpoint(vcpu, &r))
8064 return r;
8065
8066 r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
8067
8068 trace_kvm_emulate_insn_start(vcpu);
8069 ++vcpu->stat.insn_emulation;
8070
8071 return r;
8072}
8073EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
8074
8075int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
8076 int emulation_type, void *insn, int insn_len)
8077{
8078 int r;
8079 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8080 bool writeback = true;
8081 bool write_fault_to_spt;
8082
8083 if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len)))
8084 return 1;
8085
8086 vcpu->arch.l1tf_flush_l1d = true;
8087
8088
8089
8090
8091
8092 write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
8093 vcpu->arch.write_fault_to_shadow_pgtable = false;
8094
8095 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
8096 kvm_clear_exception_queue(vcpu);
8097
8098 r = x86_decode_emulated_instruction(vcpu, emulation_type,
8099 insn, insn_len);
8100 if (r != EMULATION_OK) {
8101 if ((emulation_type & EMULTYPE_TRAP_UD) ||
8102 (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
8103 kvm_queue_exception(vcpu, UD_VECTOR);
8104 return 1;
8105 }
8106 if (reexecute_instruction(vcpu, cr2_or_gpa,
8107 write_fault_to_spt,
8108 emulation_type))
8109 return 1;
8110 if (ctxt->have_exception) {
8111
8112
8113
8114
8115 WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
8116 exception_type(ctxt->exception.vector) == EXCPT_TRAP);
8117 inject_emulated_exception(vcpu);
8118 return 1;
8119 }
8120 return handle_emulation_failure(vcpu, emulation_type);
8121 }
8122 }
8123
8124 if ((emulation_type & EMULTYPE_VMWARE_GP) &&
8125 !is_vmware_backdoor_opcode(ctxt)) {
8126 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
8127 return 1;
8128 }
8129
8130
8131
8132
8133
8134
8135 if (emulation_type & EMULTYPE_SKIP) {
8136 kvm_rip_write(vcpu, ctxt->_eip);
8137 if (ctxt->eflags & X86_EFLAGS_RF)
8138 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
8139 return 1;
8140 }
8141
8142 if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
8143 return 1;
8144
8145
8146
8147 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
8148 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
8149 emulator_invalidate_register_cache(ctxt);
8150 }
8151
8152restart:
8153 if (emulation_type & EMULTYPE_PF) {
8154
8155 ctxt->exception.address = cr2_or_gpa;
8156
8157
8158 if (vcpu->arch.mmu->direct_map) {
8159 ctxt->gpa_available = true;
8160 ctxt->gpa_val = cr2_or_gpa;
8161 }
8162 } else {
8163
8164 ctxt->exception.address = 0;
8165 }
8166
8167 r = x86_emulate_insn(ctxt);
8168
8169 if (r == EMULATION_INTERCEPTED)
8170 return 1;
8171
8172 if (r == EMULATION_FAILED) {
8173 if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
8174 emulation_type))
8175 return 1;
8176
8177 return handle_emulation_failure(vcpu, emulation_type);
8178 }
8179
8180 if (ctxt->have_exception) {
8181 r = 1;
8182 if (inject_emulated_exception(vcpu))
8183 return r;
8184 } else if (vcpu->arch.pio.count) {
8185 if (!vcpu->arch.pio.in) {
8186
8187 vcpu->arch.pio.count = 0;
8188 } else {
8189 writeback = false;
8190 vcpu->arch.complete_userspace_io = complete_emulated_pio;
8191 }
8192 r = 0;
8193 } else if (vcpu->mmio_needed) {
8194 ++vcpu->stat.mmio_exits;
8195
8196 if (!vcpu->mmio_is_write)
8197 writeback = false;
8198 r = 0;
8199 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
8200 } else if (r == EMULATION_RESTART)
8201 goto restart;
8202 else
8203 r = 1;
8204
8205 if (writeback) {
8206 unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
8207 toggle_interruptibility(vcpu, ctxt->interruptibility);
8208 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8209 if (!ctxt->have_exception ||
8210 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
8211 kvm_rip_write(vcpu, ctxt->eip);
8212 if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
8213 r = kvm_vcpu_do_singlestep(vcpu);
8214 if (kvm_x86_ops.update_emulated_instruction)
8215 static_call(kvm_x86_update_emulated_instruction)(vcpu);
8216 __kvm_set_rflags(vcpu, ctxt->eflags);
8217 }
8218
8219
8220
8221
8222
8223
8224
8225 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
8226 kvm_make_request(KVM_REQ_EVENT, vcpu);
8227 } else
8228 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
8229
8230 return r;
8231}
8232
8233int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
8234{
8235 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
8236}
8237EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
8238
8239int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
8240 void *insn, int insn_len)
8241{
8242 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
8243}
8244EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
8245
8246static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
8247{
8248 vcpu->arch.pio.count = 0;
8249 return 1;
8250}
8251
8252static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
8253{
8254 vcpu->arch.pio.count = 0;
8255
8256 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
8257 return 1;
8258
8259 return kvm_skip_emulated_instruction(vcpu);
8260}
8261
8262static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
8263 unsigned short port)
8264{
8265 unsigned long val = kvm_rax_read(vcpu);
8266 int ret = emulator_pio_out(vcpu, size, port, &val, 1);
8267
8268 if (ret)
8269 return ret;
8270
8271
8272
8273
8274
8275 if (port == 0x7e &&
8276 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
8277 vcpu->arch.complete_userspace_io =
8278 complete_fast_pio_out_port_0x7e;
8279 kvm_skip_emulated_instruction(vcpu);
8280 } else {
8281 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
8282 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
8283 }
8284 return 0;
8285}
8286
8287static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
8288{
8289 unsigned long val;
8290
8291
8292 BUG_ON(vcpu->arch.pio.count != 1);
8293
8294 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
8295 vcpu->arch.pio.count = 0;
8296 return 1;
8297 }
8298
8299
8300 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
8301
8302
8303
8304
8305
8306 emulator_pio_in(vcpu, vcpu->arch.pio.size, vcpu->arch.pio.port, &val, 1);
8307 kvm_rax_write(vcpu, val);
8308
8309 return kvm_skip_emulated_instruction(vcpu);
8310}
8311
8312static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
8313 unsigned short port)
8314{
8315 unsigned long val;
8316 int ret;
8317
8318
8319 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
8320
8321 ret = emulator_pio_in(vcpu, size, port, &val, 1);
8322 if (ret) {
8323 kvm_rax_write(vcpu, val);
8324 return ret;
8325 }
8326
8327 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
8328 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
8329
8330 return 0;
8331}
8332
8333int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
8334{
8335 int ret;
8336
8337 if (in)
8338 ret = kvm_fast_pio_in(vcpu, size, port);
8339 else
8340 ret = kvm_fast_pio_out(vcpu, size, port);
8341 return ret && kvm_skip_emulated_instruction(vcpu);
8342}
8343EXPORT_SYMBOL_GPL(kvm_fast_pio);
8344
8345static int kvmclock_cpu_down_prep(unsigned int cpu)
8346{
8347 __this_cpu_write(cpu_tsc_khz, 0);
8348 return 0;
8349}
8350
8351static void tsc_khz_changed(void *data)
8352{
8353 struct cpufreq_freqs *freq = data;
8354 unsigned long khz = 0;
8355
8356 if (data)
8357 khz = freq->new;
8358 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
8359 khz = cpufreq_quick_get(raw_smp_processor_id());
8360 if (!khz)
8361 khz = tsc_khz;
8362 __this_cpu_write(cpu_tsc_khz, khz);
8363}
8364
8365#ifdef CONFIG_X86_64
8366static void kvm_hyperv_tsc_notifier(void)
8367{
8368 struct kvm *kvm;
8369 int cpu;
8370
8371 mutex_lock(&kvm_lock);
8372 list_for_each_entry(kvm, &vm_list, vm_list)
8373 kvm_make_mclock_inprogress_request(kvm);
8374
8375
8376 hyperv_stop_tsc_emulation();
8377
8378
8379 for_each_present_cpu(cpu)
8380 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
8381 kvm_max_guest_tsc_khz = tsc_khz;
8382
8383 list_for_each_entry(kvm, &vm_list, vm_list) {
8384 __kvm_start_pvclock_update(kvm);
8385 pvclock_update_vm_gtod_copy(kvm);
8386 kvm_end_pvclock_update(kvm);
8387 }
8388
8389 mutex_unlock(&kvm_lock);
8390}
8391#endif
8392
8393static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
8394{
8395 struct kvm *kvm;
8396 struct kvm_vcpu *vcpu;
8397 int i, send_ipi = 0;
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
8439
8440 mutex_lock(&kvm_lock);
8441 list_for_each_entry(kvm, &vm_list, vm_list) {
8442 kvm_for_each_vcpu(i, vcpu, kvm) {
8443 if (vcpu->cpu != cpu)
8444 continue;
8445 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8446 if (vcpu->cpu != raw_smp_processor_id())
8447 send_ipi = 1;
8448 }
8449 }
8450 mutex_unlock(&kvm_lock);
8451
8452 if (freq->old < freq->new && send_ipi) {
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
8466 }
8467}
8468
8469static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
8470 void *data)
8471{
8472 struct cpufreq_freqs *freq = data;
8473 int cpu;
8474
8475 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
8476 return 0;
8477 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
8478 return 0;
8479
8480 for_each_cpu(cpu, freq->policy->cpus)
8481 __kvmclock_cpufreq_notifier(freq, cpu);
8482
8483 return 0;
8484}
8485
8486static struct notifier_block kvmclock_cpufreq_notifier_block = {
8487 .notifier_call = kvmclock_cpufreq_notifier
8488};
8489
8490static int kvmclock_cpu_online(unsigned int cpu)
8491{
8492 tsc_khz_changed(NULL);
8493 return 0;
8494}
8495
8496static void kvm_timer_init(void)
8497{
8498 max_tsc_khz = tsc_khz;
8499
8500 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
8501#ifdef CONFIG_CPU_FREQ
8502 struct cpufreq_policy *policy;
8503 int cpu;
8504
8505 cpu = get_cpu();
8506 policy = cpufreq_cpu_get(cpu);
8507 if (policy) {
8508 if (policy->cpuinfo.max_freq)
8509 max_tsc_khz = policy->cpuinfo.max_freq;
8510 cpufreq_cpu_put(policy);
8511 }
8512 put_cpu();
8513#endif
8514 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
8515 CPUFREQ_TRANSITION_NOTIFIER);
8516 }
8517
8518 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
8519 kvmclock_cpu_online, kvmclock_cpu_down_prep);
8520}
8521
8522DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
8523EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
8524
8525int kvm_is_in_guest(void)
8526{
8527 return __this_cpu_read(current_vcpu) != NULL;
8528}
8529
8530static int kvm_is_user_mode(void)
8531{
8532 int user_mode = 3;
8533
8534 if (__this_cpu_read(current_vcpu))
8535 user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
8536
8537 return user_mode != 0;
8538}
8539
8540static unsigned long kvm_get_guest_ip(void)
8541{
8542 unsigned long ip = 0;
8543
8544 if (__this_cpu_read(current_vcpu))
8545 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
8546
8547 return ip;
8548}
8549
8550static void kvm_handle_intel_pt_intr(void)
8551{
8552 struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
8553
8554 kvm_make_request(KVM_REQ_PMI, vcpu);
8555 __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
8556 (unsigned long *)&vcpu->arch.pmu.global_status);
8557}
8558
8559static struct perf_guest_info_callbacks kvm_guest_cbs = {
8560 .is_in_guest = kvm_is_in_guest,
8561 .is_user_mode = kvm_is_user_mode,
8562 .get_guest_ip = kvm_get_guest_ip,
8563 .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
8564};
8565
8566#ifdef CONFIG_X86_64
8567static void pvclock_gtod_update_fn(struct work_struct *work)
8568{
8569 struct kvm *kvm;
8570
8571 struct kvm_vcpu *vcpu;
8572 int i;
8573
8574 mutex_lock(&kvm_lock);
8575 list_for_each_entry(kvm, &vm_list, vm_list)
8576 kvm_for_each_vcpu(i, vcpu, kvm)
8577 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
8578 atomic_set(&kvm_guest_has_master_clock, 0);
8579 mutex_unlock(&kvm_lock);
8580}
8581
8582static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
8583
8584
8585
8586
8587
8588
8589static void pvclock_irq_work_fn(struct irq_work *w)
8590{
8591 queue_work(system_long_wq, &pvclock_gtod_work);
8592}
8593
8594static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
8595
8596
8597
8598
8599static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
8600 void *priv)
8601{
8602 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
8603 struct timekeeper *tk = priv;
8604
8605 update_pvclock_gtod(tk);
8606
8607
8608
8609
8610
8611
8612 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
8613 atomic_read(&kvm_guest_has_master_clock) != 0)
8614 irq_work_queue(&pvclock_irq_work);
8615 return 0;
8616}
8617
8618static struct notifier_block pvclock_gtod_notifier = {
8619 .notifier_call = pvclock_gtod_notify,
8620};
8621#endif
8622
8623int kvm_arch_init(void *opaque)
8624{
8625 struct kvm_x86_init_ops *ops = opaque;
8626 int r;
8627
8628 if (kvm_x86_ops.hardware_enable) {
8629 pr_err("kvm: already loaded vendor module '%s'\n", kvm_x86_ops.name);
8630 r = -EEXIST;
8631 goto out;
8632 }
8633
8634 if (!ops->cpu_has_kvm_support()) {
8635 pr_err_ratelimited("kvm: no hardware support for '%s'\n",
8636 ops->runtime_ops->name);
8637 r = -EOPNOTSUPP;
8638 goto out;
8639 }
8640 if (ops->disabled_by_bios()) {
8641 pr_err_ratelimited("kvm: support for '%s' disabled by bios\n",
8642 ops->runtime_ops->name);
8643 r = -EOPNOTSUPP;
8644 goto out;
8645 }
8646
8647
8648
8649
8650
8651
8652 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
8653 printk(KERN_ERR "kvm: inadequate fpu\n");
8654 r = -EOPNOTSUPP;
8655 goto out;
8656 }
8657
8658 r = -ENOMEM;
8659
8660 x86_emulator_cache = kvm_alloc_emulator_cache();
8661 if (!x86_emulator_cache) {
8662 pr_err("kvm: failed to allocate cache for x86 emulator\n");
8663 goto out;
8664 }
8665
8666 user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
8667 if (!user_return_msrs) {
8668 printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
8669 goto out_free_x86_emulator_cache;
8670 }
8671 kvm_nr_uret_msrs = 0;
8672
8673 r = kvm_mmu_module_init();
8674 if (r)
8675 goto out_free_percpu;
8676
8677 kvm_timer_init();
8678
8679 perf_register_guest_info_callbacks(&kvm_guest_cbs);
8680
8681 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
8682 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
8683 supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
8684 }
8685
8686 if (pi_inject_timer == -1)
8687 pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
8688#ifdef CONFIG_X86_64
8689 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
8690
8691 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
8692 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
8693#endif
8694
8695 return 0;
8696
8697out_free_percpu:
8698 free_percpu(user_return_msrs);
8699out_free_x86_emulator_cache:
8700 kmem_cache_destroy(x86_emulator_cache);
8701out:
8702 return r;
8703}
8704
8705void kvm_arch_exit(void)
8706{
8707#ifdef CONFIG_X86_64
8708 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
8709 clear_hv_tscchange_cb();
8710#endif
8711 kvm_lapic_exit();
8712 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
8713
8714 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
8715 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
8716 CPUFREQ_TRANSITION_NOTIFIER);
8717 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
8718#ifdef CONFIG_X86_64
8719 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
8720 irq_work_sync(&pvclock_irq_work);
8721 cancel_work_sync(&pvclock_gtod_work);
8722#endif
8723 kvm_x86_ops.hardware_enable = NULL;
8724 kvm_mmu_module_exit();
8725 free_percpu(user_return_msrs);
8726 kmem_cache_destroy(x86_emulator_cache);
8727#ifdef CONFIG_KVM_XEN
8728 static_key_deferred_flush(&kvm_xen_enabled);
8729 WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
8730#endif
8731}
8732
8733static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
8734{
8735 ++vcpu->stat.halt_exits;
8736 if (lapic_in_kernel(vcpu)) {
8737 vcpu->arch.mp_state = state;
8738 return 1;
8739 } else {
8740 vcpu->run->exit_reason = reason;
8741 return 0;
8742 }
8743}
8744
8745int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
8746{
8747 return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
8748}
8749EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
8750
8751int kvm_emulate_halt(struct kvm_vcpu *vcpu)
8752{
8753 int ret = kvm_skip_emulated_instruction(vcpu);
8754
8755
8756
8757
8758 return kvm_vcpu_halt(vcpu) && ret;
8759}
8760EXPORT_SYMBOL_GPL(kvm_emulate_halt);
8761
8762int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
8763{
8764 int ret = kvm_skip_emulated_instruction(vcpu);
8765
8766 return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
8767}
8768EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
8769
8770#ifdef CONFIG_X86_64
8771static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
8772 unsigned long clock_type)
8773{
8774 struct kvm_clock_pairing clock_pairing;
8775 struct timespec64 ts;
8776 u64 cycle;
8777 int ret;
8778
8779 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
8780 return -KVM_EOPNOTSUPP;
8781
8782 if (!kvm_get_walltime_and_clockread(&ts, &cycle))
8783 return -KVM_EOPNOTSUPP;
8784
8785 clock_pairing.sec = ts.tv_sec;
8786 clock_pairing.nsec = ts.tv_nsec;
8787 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
8788 clock_pairing.flags = 0;
8789 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
8790
8791 ret = 0;
8792 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
8793 sizeof(struct kvm_clock_pairing)))
8794 ret = -KVM_EFAULT;
8795
8796 return ret;
8797}
8798#endif
8799
8800
8801
8802
8803
8804
8805static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
8806{
8807 struct kvm_lapic_irq lapic_irq;
8808
8809 lapic_irq.shorthand = APIC_DEST_NOSHORT;
8810 lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
8811 lapic_irq.level = 0;
8812 lapic_irq.dest_id = apicid;
8813 lapic_irq.msi_redir_hint = false;
8814
8815 lapic_irq.delivery_mode = APIC_DM_REMRD;
8816 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
8817}
8818
8819bool kvm_apicv_activated(struct kvm *kvm)
8820{
8821 return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
8822}
8823EXPORT_SYMBOL_GPL(kvm_apicv_activated);
8824
8825static void kvm_apicv_init(struct kvm *kvm)
8826{
8827 init_rwsem(&kvm->arch.apicv_update_lock);
8828
8829 set_bit(APICV_INHIBIT_REASON_ABSENT,
8830 &kvm->arch.apicv_inhibit_reasons);
8831 if (!enable_apicv)
8832 set_bit(APICV_INHIBIT_REASON_DISABLE,
8833 &kvm->arch.apicv_inhibit_reasons);
8834}
8835
8836static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
8837{
8838 struct kvm_vcpu *target = NULL;
8839 struct kvm_apic_map *map;
8840
8841 vcpu->stat.directed_yield_attempted++;
8842
8843 if (single_task_running())
8844 goto no_yield;
8845
8846 rcu_read_lock();
8847 map = rcu_dereference(vcpu->kvm->arch.apic_map);
8848
8849 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
8850 target = map->phys_map[dest_id]->vcpu;
8851
8852 rcu_read_unlock();
8853
8854 if (!target || !READ_ONCE(target->ready))
8855 goto no_yield;
8856
8857
8858 if (vcpu == target)
8859 goto no_yield;
8860
8861 if (kvm_vcpu_yield_to(target) <= 0)
8862 goto no_yield;
8863
8864 vcpu->stat.directed_yield_successful++;
8865
8866no_yield:
8867 return;
8868}
8869
8870static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
8871{
8872 u64 ret = vcpu->run->hypercall.ret;
8873
8874 if (!is_64_bit_mode(vcpu))
8875 ret = (u32)ret;
8876 kvm_rax_write(vcpu, ret);
8877 ++vcpu->stat.hypercalls;
8878 return kvm_skip_emulated_instruction(vcpu);
8879}
8880
8881int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
8882{
8883 unsigned long nr, a0, a1, a2, a3, ret;
8884 int op_64_bit;
8885
8886 if (kvm_xen_hypercall_enabled(vcpu->kvm))
8887 return kvm_xen_hypercall(vcpu);
8888
8889 if (kvm_hv_hypercall_enabled(vcpu))
8890 return kvm_hv_hypercall(vcpu);
8891
8892 nr = kvm_rax_read(vcpu);
8893 a0 = kvm_rbx_read(vcpu);
8894 a1 = kvm_rcx_read(vcpu);
8895 a2 = kvm_rdx_read(vcpu);
8896 a3 = kvm_rsi_read(vcpu);
8897
8898 trace_kvm_hypercall(nr, a0, a1, a2, a3);
8899
8900 op_64_bit = is_64_bit_hypercall(vcpu);
8901 if (!op_64_bit) {
8902 nr &= 0xFFFFFFFF;
8903 a0 &= 0xFFFFFFFF;
8904 a1 &= 0xFFFFFFFF;
8905 a2 &= 0xFFFFFFFF;
8906 a3 &= 0xFFFFFFFF;
8907 }
8908
8909 if (static_call(kvm_x86_get_cpl)(vcpu) != 0) {
8910 ret = -KVM_EPERM;
8911 goto out;
8912 }
8913
8914 ret = -KVM_ENOSYS;
8915
8916 switch (nr) {
8917 case KVM_HC_VAPIC_POLL_IRQ:
8918 ret = 0;
8919 break;
8920 case KVM_HC_KICK_CPU:
8921 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
8922 break;
8923
8924 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
8925 kvm_sched_yield(vcpu, a1);
8926 ret = 0;
8927 break;
8928#ifdef CONFIG_X86_64
8929 case KVM_HC_CLOCK_PAIRING:
8930 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
8931 break;
8932#endif
8933 case KVM_HC_SEND_IPI:
8934 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
8935 break;
8936
8937 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
8938 break;
8939 case KVM_HC_SCHED_YIELD:
8940 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
8941 break;
8942
8943 kvm_sched_yield(vcpu, a0);
8944 ret = 0;
8945 break;
8946 case KVM_HC_MAP_GPA_RANGE: {
8947 u64 gpa = a0, npages = a1, attrs = a2;
8948
8949 ret = -KVM_ENOSYS;
8950 if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE)))
8951 break;
8952
8953 if (!PAGE_ALIGNED(gpa) || !npages ||
8954 gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
8955 ret = -KVM_EINVAL;
8956 break;
8957 }
8958
8959 vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
8960 vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
8961 vcpu->run->hypercall.args[0] = gpa;
8962 vcpu->run->hypercall.args[1] = npages;
8963 vcpu->run->hypercall.args[2] = attrs;
8964 vcpu->run->hypercall.longmode = op_64_bit;
8965 vcpu->arch.complete_userspace_io = complete_hypercall_exit;
8966 return 0;
8967 }
8968 default:
8969 ret = -KVM_ENOSYS;
8970 break;
8971 }
8972out:
8973 if (!op_64_bit)
8974 ret = (u32)ret;
8975 kvm_rax_write(vcpu, ret);
8976
8977 ++vcpu->stat.hypercalls;
8978 return kvm_skip_emulated_instruction(vcpu);
8979}
8980EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
8981
8982static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
8983{
8984 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
8985 char instruction[3];
8986 unsigned long rip = kvm_rip_read(vcpu);
8987
8988 static_call(kvm_x86_patch_hypercall)(vcpu, instruction);
8989
8990 return emulator_write_emulated(ctxt, rip, instruction, 3,
8991 &ctxt->exception);
8992}
8993
8994static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
8995{
8996 return vcpu->run->request_interrupt_window &&
8997 likely(!pic_in_kernel(vcpu->kvm));
8998}
8999
9000static void post_kvm_run_save(struct kvm_vcpu *vcpu)
9001{
9002 struct kvm_run *kvm_run = vcpu->run;
9003
9004 kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu);
9005 kvm_run->cr8 = kvm_get_cr8(vcpu);
9006 kvm_run->apic_base = kvm_get_apic_base(vcpu);
9007
9008
9009
9010
9011
9012
9013 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
9014 kvm_run->ready_for_interrupt_injection =
9015 pic_in_kernel(vcpu->kvm) ||
9016 kvm_vcpu_ready_for_interrupt_injection(vcpu);
9017 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
9018
9019 if (is_smm(vcpu))
9020 kvm_run->flags |= KVM_RUN_X86_SMM;
9021}
9022
9023static void update_cr8_intercept(struct kvm_vcpu *vcpu)
9024{
9025 int max_irr, tpr;
9026
9027 if (!kvm_x86_ops.update_cr8_intercept)
9028 return;
9029
9030 if (!lapic_in_kernel(vcpu))
9031 return;
9032
9033 if (vcpu->arch.apicv_active)
9034 return;
9035
9036 if (!vcpu->arch.apic->vapic_addr)
9037 max_irr = kvm_lapic_find_highest_irr(vcpu);
9038 else
9039 max_irr = -1;
9040
9041 if (max_irr != -1)
9042 max_irr >>= 4;
9043
9044 tpr = kvm_lapic_get_cr8(vcpu);
9045
9046 static_call(kvm_x86_update_cr8_intercept)(vcpu, tpr, max_irr);
9047}
9048
9049
9050int kvm_check_nested_events(struct kvm_vcpu *vcpu)
9051{
9052 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
9053 kvm_x86_ops.nested_ops->triple_fault(vcpu);
9054 return 1;
9055 }
9056
9057 return kvm_x86_ops.nested_ops->check_events(vcpu);
9058}
9059
9060static void kvm_inject_exception(struct kvm_vcpu *vcpu)
9061{
9062 if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
9063 vcpu->arch.exception.error_code = false;
9064 static_call(kvm_x86_queue_exception)(vcpu);
9065}
9066
9067static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
9068{
9069 int r;
9070 bool can_inject = true;
9071
9072
9073
9074 if (vcpu->arch.exception.injected) {
9075 kvm_inject_exception(vcpu);
9076 can_inject = false;
9077 }
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092 else if (!vcpu->arch.exception.pending) {
9093 if (vcpu->arch.nmi_injected) {
9094 static_call(kvm_x86_set_nmi)(vcpu);
9095 can_inject = false;
9096 } else if (vcpu->arch.interrupt.injected) {
9097 static_call(kvm_x86_set_irq)(vcpu);
9098 can_inject = false;
9099 }
9100 }
9101
9102 WARN_ON_ONCE(vcpu->arch.exception.injected &&
9103 vcpu->arch.exception.pending);
9104
9105
9106
9107
9108
9109
9110
9111 if (is_guest_mode(vcpu)) {
9112 r = kvm_check_nested_events(vcpu);
9113 if (r < 0)
9114 goto out;
9115 }
9116
9117
9118 if (vcpu->arch.exception.pending) {
9119 trace_kvm_inj_exception(vcpu->arch.exception.nr,
9120 vcpu->arch.exception.has_error_code,
9121 vcpu->arch.exception.error_code);
9122
9123 vcpu->arch.exception.pending = false;
9124 vcpu->arch.exception.injected = true;
9125
9126 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
9127 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
9128 X86_EFLAGS_RF);
9129
9130 if (vcpu->arch.exception.nr == DB_VECTOR) {
9131 kvm_deliver_exception_payload(vcpu);
9132 if (vcpu->arch.dr7 & DR7_GD) {
9133 vcpu->arch.dr7 &= ~DR7_GD;
9134 kvm_update_dr7(vcpu);
9135 }
9136 }
9137
9138 kvm_inject_exception(vcpu);
9139 can_inject = false;
9140 }
9141
9142
9143 if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
9144 return 0;
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157 if (vcpu->arch.smi_pending) {
9158 r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
9159 if (r < 0)
9160 goto out;
9161 if (r) {
9162 vcpu->arch.smi_pending = false;
9163 ++vcpu->arch.smi_count;
9164 enter_smm(vcpu);
9165 can_inject = false;
9166 } else
9167 static_call(kvm_x86_enable_smi_window)(vcpu);
9168 }
9169
9170 if (vcpu->arch.nmi_pending) {
9171 r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
9172 if (r < 0)
9173 goto out;
9174 if (r) {
9175 --vcpu->arch.nmi_pending;
9176 vcpu->arch.nmi_injected = true;
9177 static_call(kvm_x86_set_nmi)(vcpu);
9178 can_inject = false;
9179 WARN_ON(static_call(kvm_x86_nmi_allowed)(vcpu, true) < 0);
9180 }
9181 if (vcpu->arch.nmi_pending)
9182 static_call(kvm_x86_enable_nmi_window)(vcpu);
9183 }
9184
9185 if (kvm_cpu_has_injectable_intr(vcpu)) {
9186 r = can_inject ? static_call(kvm_x86_interrupt_allowed)(vcpu, true) : -EBUSY;
9187 if (r < 0)
9188 goto out;
9189 if (r) {
9190 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
9191 static_call(kvm_x86_set_irq)(vcpu);
9192 WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
9193 }
9194 if (kvm_cpu_has_injectable_intr(vcpu))
9195 static_call(kvm_x86_enable_irq_window)(vcpu);
9196 }
9197
9198 if (is_guest_mode(vcpu) &&
9199 kvm_x86_ops.nested_ops->hv_timer_pending &&
9200 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
9201 *req_immediate_exit = true;
9202
9203 WARN_ON(vcpu->arch.exception.pending);
9204 return 0;
9205
9206out:
9207 if (r == -EBUSY) {
9208 *req_immediate_exit = true;
9209 r = 0;
9210 }
9211 return r;
9212}
9213
9214static void process_nmi(struct kvm_vcpu *vcpu)
9215{
9216 unsigned limit = 2;
9217
9218
9219
9220
9221
9222
9223 if (static_call(kvm_x86_get_nmi_mask)(vcpu) || vcpu->arch.nmi_injected)
9224 limit = 1;
9225
9226 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
9227 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
9228 kvm_make_request(KVM_REQ_EVENT, vcpu);
9229}
9230
9231static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
9232{
9233 u32 flags = 0;
9234 flags |= seg->g << 23;
9235 flags |= seg->db << 22;
9236 flags |= seg->l << 21;
9237 flags |= seg->avl << 20;
9238 flags |= seg->present << 15;
9239 flags |= seg->dpl << 13;
9240 flags |= seg->s << 12;
9241 flags |= seg->type << 8;
9242 return flags;
9243}
9244
9245static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
9246{
9247 struct kvm_segment seg;
9248 int offset;
9249
9250 kvm_get_segment(vcpu, &seg, n);
9251 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
9252
9253 if (n < 3)
9254 offset = 0x7f84 + n * 12;
9255 else
9256 offset = 0x7f2c + (n - 3) * 12;
9257
9258 put_smstate(u32, buf, offset + 8, seg.base);
9259 put_smstate(u32, buf, offset + 4, seg.limit);
9260 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
9261}
9262
9263#ifdef CONFIG_X86_64
9264static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
9265{
9266 struct kvm_segment seg;
9267 int offset;
9268 u16 flags;
9269
9270 kvm_get_segment(vcpu, &seg, n);
9271 offset = 0x7e00 + n * 16;
9272
9273 flags = enter_smm_get_segment_flags(&seg) >> 8;
9274 put_smstate(u16, buf, offset, seg.selector);
9275 put_smstate(u16, buf, offset + 2, flags);
9276 put_smstate(u32, buf, offset + 4, seg.limit);
9277 put_smstate(u64, buf, offset + 8, seg.base);
9278}
9279#endif
9280
9281static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
9282{
9283 struct desc_ptr dt;
9284 struct kvm_segment seg;
9285 unsigned long val;
9286 int i;
9287
9288 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
9289 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
9290 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
9291 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
9292
9293 for (i = 0; i < 8; i++)
9294 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
9295
9296 kvm_get_dr(vcpu, 6, &val);
9297 put_smstate(u32, buf, 0x7fcc, (u32)val);
9298 kvm_get_dr(vcpu, 7, &val);
9299 put_smstate(u32, buf, 0x7fc8, (u32)val);
9300
9301 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
9302 put_smstate(u32, buf, 0x7fc4, seg.selector);
9303 put_smstate(u32, buf, 0x7f64, seg.base);
9304 put_smstate(u32, buf, 0x7f60, seg.limit);
9305 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
9306
9307 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
9308 put_smstate(u32, buf, 0x7fc0, seg.selector);
9309 put_smstate(u32, buf, 0x7f80, seg.base);
9310 put_smstate(u32, buf, 0x7f7c, seg.limit);
9311 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
9312
9313 static_call(kvm_x86_get_gdt)(vcpu, &dt);
9314 put_smstate(u32, buf, 0x7f74, dt.address);
9315 put_smstate(u32, buf, 0x7f70, dt.size);
9316
9317 static_call(kvm_x86_get_idt)(vcpu, &dt);
9318 put_smstate(u32, buf, 0x7f58, dt.address);
9319 put_smstate(u32, buf, 0x7f54, dt.size);
9320
9321 for (i = 0; i < 6; i++)
9322 enter_smm_save_seg_32(vcpu, buf, i);
9323
9324 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
9325
9326
9327 put_smstate(u32, buf, 0x7efc, 0x00020000);
9328 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
9329}
9330
9331#ifdef CONFIG_X86_64
9332static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
9333{
9334 struct desc_ptr dt;
9335 struct kvm_segment seg;
9336 unsigned long val;
9337 int i;
9338
9339 for (i = 0; i < 16; i++)
9340 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
9341
9342 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
9343 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
9344
9345 kvm_get_dr(vcpu, 6, &val);
9346 put_smstate(u64, buf, 0x7f68, val);
9347 kvm_get_dr(vcpu, 7, &val);
9348 put_smstate(u64, buf, 0x7f60, val);
9349
9350 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
9351 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
9352 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
9353
9354 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
9355
9356
9357 put_smstate(u32, buf, 0x7efc, 0x00020064);
9358
9359 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
9360
9361 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
9362 put_smstate(u16, buf, 0x7e90, seg.selector);
9363 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
9364 put_smstate(u32, buf, 0x7e94, seg.limit);
9365 put_smstate(u64, buf, 0x7e98, seg.base);
9366
9367 static_call(kvm_x86_get_idt)(vcpu, &dt);
9368 put_smstate(u32, buf, 0x7e84, dt.size);
9369 put_smstate(u64, buf, 0x7e88, dt.address);
9370
9371 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
9372 put_smstate(u16, buf, 0x7e70, seg.selector);
9373 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
9374 put_smstate(u32, buf, 0x7e74, seg.limit);
9375 put_smstate(u64, buf, 0x7e78, seg.base);
9376
9377 static_call(kvm_x86_get_gdt)(vcpu, &dt);
9378 put_smstate(u32, buf, 0x7e64, dt.size);
9379 put_smstate(u64, buf, 0x7e68, dt.address);
9380
9381 for (i = 0; i < 6; i++)
9382 enter_smm_save_seg_64(vcpu, buf, i);
9383}
9384#endif
9385
9386static void enter_smm(struct kvm_vcpu *vcpu)
9387{
9388 struct kvm_segment cs, ds;
9389 struct desc_ptr dt;
9390 unsigned long cr0;
9391 char buf[512];
9392
9393 memset(buf, 0, 512);
9394#ifdef CONFIG_X86_64
9395 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
9396 enter_smm_save_state_64(vcpu, buf);
9397 else
9398#endif
9399 enter_smm_save_state_32(vcpu, buf);
9400
9401
9402
9403
9404
9405
9406 static_call(kvm_x86_enter_smm)(vcpu, buf);
9407
9408 kvm_smm_changed(vcpu, true);
9409 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
9410
9411 if (static_call(kvm_x86_get_nmi_mask)(vcpu))
9412 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
9413 else
9414 static_call(kvm_x86_set_nmi_mask)(vcpu, true);
9415
9416 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
9417 kvm_rip_write(vcpu, 0x8000);
9418
9419 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
9420 static_call(kvm_x86_set_cr0)(vcpu, cr0);
9421 vcpu->arch.cr0 = cr0;
9422
9423 static_call(kvm_x86_set_cr4)(vcpu, 0);
9424
9425
9426 dt.address = dt.size = 0;
9427 static_call(kvm_x86_set_idt)(vcpu, &dt);
9428
9429 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
9430
9431 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
9432 cs.base = vcpu->arch.smbase;
9433
9434 ds.selector = 0;
9435 ds.base = 0;
9436
9437 cs.limit = ds.limit = 0xffffffff;
9438 cs.type = ds.type = 0x3;
9439 cs.dpl = ds.dpl = 0;
9440 cs.db = ds.db = 0;
9441 cs.s = ds.s = 1;
9442 cs.l = ds.l = 0;
9443 cs.g = ds.g = 1;
9444 cs.avl = ds.avl = 0;
9445 cs.present = ds.present = 1;
9446 cs.unusable = ds.unusable = 0;
9447 cs.padding = ds.padding = 0;
9448
9449 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
9450 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
9451 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
9452 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
9453 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
9454 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
9455
9456#ifdef CONFIG_X86_64
9457 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
9458 static_call(kvm_x86_set_efer)(vcpu, 0);
9459#endif
9460
9461 kvm_update_cpuid_runtime(vcpu);
9462 kvm_mmu_reset_context(vcpu);
9463}
9464
9465static void process_smi(struct kvm_vcpu *vcpu)
9466{
9467 vcpu->arch.smi_pending = true;
9468 kvm_make_request(KVM_REQ_EVENT, vcpu);
9469}
9470
9471void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
9472 unsigned long *vcpu_bitmap)
9473{
9474 kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, vcpu_bitmap);
9475}
9476
9477void kvm_make_scan_ioapic_request(struct kvm *kvm)
9478{
9479 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
9480}
9481
9482void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
9483{
9484 bool activate;
9485
9486 if (!lapic_in_kernel(vcpu))
9487 return;
9488
9489 down_read(&vcpu->kvm->arch.apicv_update_lock);
9490
9491 activate = kvm_apicv_activated(vcpu->kvm);
9492 if (vcpu->arch.apicv_active == activate)
9493 goto out;
9494
9495 vcpu->arch.apicv_active = activate;
9496 kvm_apic_update_apicv(vcpu);
9497 static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
9498
9499
9500
9501
9502
9503
9504
9505 if (!vcpu->arch.apicv_active)
9506 kvm_make_request(KVM_REQ_EVENT, vcpu);
9507
9508out:
9509 up_read(&vcpu->kvm->arch.apicv_update_lock);
9510}
9511EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
9512
9513void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
9514{
9515 unsigned long old, new;
9516
9517 lockdep_assert_held_write(&kvm->arch.apicv_update_lock);
9518
9519 if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
9520 !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
9521 return;
9522
9523 old = new = kvm->arch.apicv_inhibit_reasons;
9524
9525 if (activate)
9526 __clear_bit(bit, &new);
9527 else
9528 __set_bit(bit, &new);
9529
9530 if (!!old != !!new) {
9531 trace_kvm_apicv_update_request(activate, bit);
9532
9533
9534
9535
9536
9537
9538
9539
9540
9541
9542
9543
9544 kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
9545 kvm->arch.apicv_inhibit_reasons = new;
9546 if (new) {
9547 unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
9548 kvm_zap_gfn_range(kvm, gfn, gfn+1);
9549 }
9550 } else
9551 kvm->arch.apicv_inhibit_reasons = new;
9552}
9553EXPORT_SYMBOL_GPL(__kvm_request_apicv_update);
9554
9555void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
9556{
9557 down_write(&kvm->arch.apicv_update_lock);
9558 __kvm_request_apicv_update(kvm, activate, bit);
9559 up_write(&kvm->arch.apicv_update_lock);
9560}
9561EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
9562
9563static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
9564{
9565 if (!kvm_apic_present(vcpu))
9566 return;
9567
9568 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
9569
9570 if (irqchip_split(vcpu->kvm))
9571 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
9572 else {
9573 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
9574 if (ioapic_in_kernel(vcpu->kvm))
9575 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
9576 }
9577
9578 if (is_guest_mode(vcpu))
9579 vcpu->arch.load_eoi_exitmap_pending = true;
9580 else
9581 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
9582}
9583
9584static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
9585{
9586 u64 eoi_exit_bitmap[4];
9587
9588 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
9589 return;
9590
9591 if (to_hv_vcpu(vcpu)) {
9592 bitmap_or((ulong *)eoi_exit_bitmap,
9593 vcpu->arch.ioapic_handled_vectors,
9594 to_hv_synic(vcpu)->vec_bitmap, 256);
9595 static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
9596 return;
9597 }
9598
9599 static_call(kvm_x86_load_eoi_exitmap)(
9600 vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
9601}
9602
9603void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
9604 unsigned long start, unsigned long end)
9605{
9606 unsigned long apic_address;
9607
9608
9609
9610
9611
9612 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
9613 if (start <= apic_address && apic_address < end)
9614 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
9615}
9616
9617void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
9618{
9619 if (!lapic_in_kernel(vcpu))
9620 return;
9621
9622 if (!kvm_x86_ops.set_apic_access_page_addr)
9623 return;
9624
9625 static_call(kvm_x86_set_apic_access_page_addr)(vcpu);
9626}
9627
9628void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
9629{
9630 smp_send_reschedule(vcpu->cpu);
9631}
9632EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
9633
9634
9635
9636
9637
9638
9639static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
9640{
9641 int r;
9642 bool req_int_win =
9643 dm_request_for_irq_injection(vcpu) &&
9644 kvm_cpu_accept_dm_intr(vcpu);
9645 fastpath_t exit_fastpath;
9646
9647 bool req_immediate_exit = false;
9648
9649
9650 if (unlikely(vcpu->kvm->dirty_ring_size &&
9651 kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
9652 vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
9653 trace_kvm_dirty_ring_exit(vcpu);
9654 r = 0;
9655 goto out;
9656 }
9657
9658 if (kvm_request_pending(vcpu)) {
9659 if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
9660 r = -EIO;
9661 goto out;
9662 }
9663 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
9664 if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
9665 r = 0;
9666 goto out;
9667 }
9668 }
9669 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
9670 kvm_mmu_unload(vcpu);
9671 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
9672 __kvm_migrate_timers(vcpu);
9673 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
9674 kvm_update_masterclock(vcpu->kvm);
9675 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
9676 kvm_gen_kvmclock_update(vcpu);
9677 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
9678 r = kvm_guest_time_update(vcpu);
9679 if (unlikely(r))
9680 goto out;
9681 }
9682 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
9683 kvm_mmu_sync_roots(vcpu);
9684 if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
9685 kvm_mmu_load_pgd(vcpu);
9686 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
9687 kvm_vcpu_flush_tlb_all(vcpu);
9688
9689
9690 kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
9691 }
9692 kvm_service_local_tlb_flush_requests(vcpu);
9693
9694 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
9695 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
9696 r = 0;
9697 goto out;
9698 }
9699 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
9700 if (is_guest_mode(vcpu)) {
9701 kvm_x86_ops.nested_ops->triple_fault(vcpu);
9702 } else {
9703 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
9704 vcpu->mmio_needed = 0;
9705 r = 0;
9706 goto out;
9707 }
9708 }
9709 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
9710
9711 vcpu->arch.apf.halted = true;
9712 r = 1;
9713 goto out;
9714 }
9715 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
9716 record_steal_time(vcpu);
9717 if (kvm_check_request(KVM_REQ_SMI, vcpu))
9718 process_smi(vcpu);
9719 if (kvm_check_request(KVM_REQ_NMI, vcpu))
9720 process_nmi(vcpu);
9721 if (kvm_check_request(KVM_REQ_PMU, vcpu))
9722 kvm_pmu_handle_event(vcpu);
9723 if (kvm_check_request(KVM_REQ_PMI, vcpu))
9724 kvm_pmu_deliver_pmi(vcpu);
9725 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
9726 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
9727 if (test_bit(vcpu->arch.pending_ioapic_eoi,
9728 vcpu->arch.ioapic_handled_vectors)) {
9729 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
9730 vcpu->run->eoi.vector =
9731 vcpu->arch.pending_ioapic_eoi;
9732 r = 0;
9733 goto out;
9734 }
9735 }
9736 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
9737 vcpu_scan_ioapic(vcpu);
9738 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
9739 vcpu_load_eoi_exitmap(vcpu);
9740 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
9741 kvm_vcpu_reload_apic_access_page(vcpu);
9742 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
9743 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
9744 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
9745 r = 0;
9746 goto out;
9747 }
9748 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
9749 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
9750 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
9751 r = 0;
9752 goto out;
9753 }
9754 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
9755 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
9756
9757 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
9758 vcpu->run->hyperv = hv_vcpu->exit;
9759 r = 0;
9760 goto out;
9761 }
9762
9763
9764
9765
9766
9767
9768 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
9769 kvm_hv_process_stimers(vcpu);
9770 if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
9771 kvm_vcpu_update_apicv(vcpu);
9772 if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
9773 kvm_check_async_pf_completion(vcpu);
9774 if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
9775 static_call(kvm_x86_msr_filter_changed)(vcpu);
9776
9777 if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
9778 static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
9779 }
9780
9781 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
9782 kvm_xen_has_interrupt(vcpu)) {
9783 ++vcpu->stat.req_event;
9784 r = kvm_apic_accept_events(vcpu);
9785 if (r < 0) {
9786 r = 0;
9787 goto out;
9788 }
9789 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
9790 r = 1;
9791 goto out;
9792 }
9793
9794 r = inject_pending_event(vcpu, &req_immediate_exit);
9795 if (r < 0) {
9796 r = 0;
9797 goto out;
9798 }
9799 if (req_int_win)
9800 static_call(kvm_x86_enable_irq_window)(vcpu);
9801
9802 if (kvm_lapic_enabled(vcpu)) {
9803 update_cr8_intercept(vcpu);
9804 kvm_lapic_sync_to_vapic(vcpu);
9805 }
9806 }
9807
9808 r = kvm_mmu_reload(vcpu);
9809 if (unlikely(r)) {
9810 goto cancel_injection;
9811 }
9812
9813 preempt_disable();
9814
9815 static_call(kvm_x86_prepare_guest_switch)(vcpu);
9816
9817
9818
9819
9820
9821
9822 local_irq_disable();
9823 vcpu->mode = IN_GUEST_MODE;
9824
9825 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
9826
9827
9828
9829
9830
9831
9832
9833
9834
9835
9836
9837
9838
9839 smp_mb__after_srcu_read_unlock();
9840
9841
9842
9843
9844
9845
9846
9847 if (kvm_lapic_enabled(vcpu))
9848 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
9849
9850 if (kvm_vcpu_exit_request(vcpu)) {
9851 vcpu->mode = OUTSIDE_GUEST_MODE;
9852 smp_wmb();
9853 local_irq_enable();
9854 preempt_enable();
9855 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
9856 r = 1;
9857 goto cancel_injection;
9858 }
9859
9860 if (req_immediate_exit) {
9861 kvm_make_request(KVM_REQ_EVENT, vcpu);
9862 static_call(kvm_x86_request_immediate_exit)(vcpu);
9863 }
9864
9865 fpregs_assert_state_consistent();
9866 if (test_thread_flag(TIF_NEED_FPU_LOAD))
9867 switch_fpu_return();
9868
9869 if (unlikely(vcpu->arch.switch_db_regs)) {
9870 set_debugreg(0, 7);
9871 set_debugreg(vcpu->arch.eff_db[0], 0);
9872 set_debugreg(vcpu->arch.eff_db[1], 1);
9873 set_debugreg(vcpu->arch.eff_db[2], 2);
9874 set_debugreg(vcpu->arch.eff_db[3], 3);
9875 } else if (unlikely(hw_breakpoint_active())) {
9876 set_debugreg(0, 7);
9877 }
9878
9879 for (;;) {
9880
9881
9882
9883
9884
9885
9886 WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
9887
9888 exit_fastpath = static_call(kvm_x86_run)(vcpu);
9889 if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
9890 break;
9891
9892 if (kvm_lapic_enabled(vcpu))
9893 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
9894
9895 if (unlikely(kvm_vcpu_exit_request(vcpu))) {
9896 exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
9897 break;
9898 }
9899 }
9900
9901
9902
9903
9904
9905
9906
9907 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
9908 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
9909 static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
9910 kvm_update_dr0123(vcpu);
9911 kvm_update_dr7(vcpu);
9912 }
9913
9914
9915
9916
9917
9918
9919
9920
9921 if (hw_breakpoint_active())
9922 hw_breakpoint_restore();
9923
9924 vcpu->arch.last_vmentry_cpu = vcpu->cpu;
9925 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
9926
9927 vcpu->mode = OUTSIDE_GUEST_MODE;
9928 smp_wmb();
9929
9930 static_call(kvm_x86_handle_exit_irqoff)(vcpu);
9931
9932
9933
9934
9935
9936
9937
9938
9939 kvm_before_interrupt(vcpu);
9940 local_irq_enable();
9941 ++vcpu->stat.exits;
9942 local_irq_disable();
9943 kvm_after_interrupt(vcpu);
9944
9945
9946
9947
9948
9949
9950
9951
9952 vtime_account_guest_exit();
9953
9954 if (lapic_in_kernel(vcpu)) {
9955 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
9956 if (delta != S64_MIN) {
9957 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
9958 vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
9959 }
9960 }
9961
9962 local_irq_enable();
9963 preempt_enable();
9964
9965 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
9966
9967
9968
9969
9970 if (unlikely(prof_on == KVM_PROFILING)) {
9971 unsigned long rip = kvm_rip_read(vcpu);
9972 profile_hit(KVM_PROFILING, (void *)rip);
9973 }
9974
9975 if (unlikely(vcpu->arch.tsc_always_catchup))
9976 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
9977
9978 if (vcpu->arch.apic_attention)
9979 kvm_lapic_sync_from_vapic(vcpu);
9980
9981 r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath);
9982 return r;
9983
9984cancel_injection:
9985 if (req_immediate_exit)
9986 kvm_make_request(KVM_REQ_EVENT, vcpu);
9987 static_call(kvm_x86_cancel_injection)(vcpu);
9988 if (unlikely(vcpu->arch.apic_attention))
9989 kvm_lapic_sync_from_vapic(vcpu);
9990out:
9991 return r;
9992}
9993
9994static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
9995{
9996 if (!kvm_arch_vcpu_runnable(vcpu) &&
9997 (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) {
9998 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
9999 kvm_vcpu_block(vcpu);
10000 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
10001
10002 if (kvm_x86_ops.post_block)
10003 static_call(kvm_x86_post_block)(vcpu);
10004
10005 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
10006 return 1;
10007 }
10008
10009 if (kvm_apic_accept_events(vcpu) < 0)
10010 return 0;
10011 switch(vcpu->arch.mp_state) {
10012 case KVM_MP_STATE_HALTED:
10013 case KVM_MP_STATE_AP_RESET_HOLD:
10014 vcpu->arch.pv.pv_unhalted = false;
10015 vcpu->arch.mp_state =
10016 KVM_MP_STATE_RUNNABLE;
10017 fallthrough;
10018 case KVM_MP_STATE_RUNNABLE:
10019 vcpu->arch.apf.halted = false;
10020 break;
10021 case KVM_MP_STATE_INIT_RECEIVED:
10022 break;
10023 default:
10024 return -EINTR;
10025 }
10026 return 1;
10027}
10028
10029static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
10030{
10031 if (is_guest_mode(vcpu))
10032 kvm_check_nested_events(vcpu);
10033
10034 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
10035 !vcpu->arch.apf.halted);
10036}
10037
10038static int vcpu_run(struct kvm_vcpu *vcpu)
10039{
10040 int r;
10041 struct kvm *kvm = vcpu->kvm;
10042
10043 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
10044 vcpu->arch.l1tf_flush_l1d = true;
10045
10046 for (;;) {
10047 if (kvm_vcpu_running(vcpu)) {
10048 r = vcpu_enter_guest(vcpu);
10049 } else {
10050 r = vcpu_block(kvm, vcpu);
10051 }
10052
10053 if (r <= 0)
10054 break;
10055
10056 kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
10057 if (kvm_cpu_has_pending_timer(vcpu))
10058 kvm_inject_pending_timer_irqs(vcpu);
10059
10060 if (dm_request_for_irq_injection(vcpu) &&
10061 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
10062 r = 0;
10063 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
10064 ++vcpu->stat.request_irq_exits;
10065 break;
10066 }
10067
10068 if (__xfer_to_guest_mode_work_pending()) {
10069 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
10070 r = xfer_to_guest_mode_handle_work(vcpu);
10071 if (r)
10072 return r;
10073 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
10074 }
10075 }
10076
10077 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
10078
10079 return r;
10080}
10081
10082static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
10083{
10084 int r;
10085
10086 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
10087 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
10088 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
10089 return r;
10090}
10091
10092static int complete_emulated_pio(struct kvm_vcpu *vcpu)
10093{
10094 BUG_ON(!vcpu->arch.pio.count);
10095
10096 return complete_emulated_io(vcpu);
10097}
10098
10099
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
10118{
10119 struct kvm_run *run = vcpu->run;
10120 struct kvm_mmio_fragment *frag;
10121 unsigned len;
10122
10123 BUG_ON(!vcpu->mmio_needed);
10124
10125
10126 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
10127 len = min(8u, frag->len);
10128 if (!vcpu->mmio_is_write)
10129 memcpy(frag->data, run->mmio.data, len);
10130
10131 if (frag->len <= 8) {
10132
10133 frag++;
10134 vcpu->mmio_cur_fragment++;
10135 } else {
10136
10137 frag->data += len;
10138 frag->gpa += len;
10139 frag->len -= len;
10140 }
10141
10142 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
10143 vcpu->mmio_needed = 0;
10144
10145
10146 if (vcpu->mmio_is_write)
10147 return 1;
10148 vcpu->mmio_read_completed = 1;
10149 return complete_emulated_io(vcpu);
10150 }
10151
10152 run->exit_reason = KVM_EXIT_MMIO;
10153 run->mmio.phys_addr = frag->gpa;
10154 if (vcpu->mmio_is_write)
10155 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
10156 run->mmio.len = min(8u, frag->len);
10157 run->mmio.is_write = vcpu->mmio_is_write;
10158 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
10159 return 0;
10160}
10161
10162
10163static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
10164{
10165
10166
10167
10168
10169 fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true);
10170 trace_kvm_fpu(1);
10171}
10172
10173
10174static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
10175{
10176 fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false);
10177 ++vcpu->stat.fpu_reload;
10178 trace_kvm_fpu(0);
10179}
10180
10181int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
10182{
10183 struct kvm_run *kvm_run = vcpu->run;
10184 int r;
10185
10186 vcpu_load(vcpu);
10187 kvm_sigset_activate(vcpu);
10188 kvm_run->flags = 0;
10189 kvm_load_guest_fpu(vcpu);
10190
10191 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
10192 if (kvm_run->immediate_exit) {
10193 r = -EINTR;
10194 goto out;
10195 }
10196 kvm_vcpu_block(vcpu);
10197 if (kvm_apic_accept_events(vcpu) < 0) {
10198 r = 0;
10199 goto out;
10200 }
10201 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
10202 r = -EAGAIN;
10203 if (signal_pending(current)) {
10204 r = -EINTR;
10205 kvm_run->exit_reason = KVM_EXIT_INTR;
10206 ++vcpu->stat.signal_exits;
10207 }
10208 goto out;
10209 }
10210
10211 if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
10212 (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
10213 r = -EINVAL;
10214 goto out;
10215 }
10216
10217 if (kvm_run->kvm_dirty_regs) {
10218 r = sync_regs(vcpu);
10219 if (r != 0)
10220 goto out;
10221 }
10222
10223
10224 if (!lapic_in_kernel(vcpu)) {
10225 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
10226 r = -EINVAL;
10227 goto out;
10228 }
10229 }
10230
10231 if (unlikely(vcpu->arch.complete_userspace_io)) {
10232 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
10233 vcpu->arch.complete_userspace_io = NULL;
10234 r = cui(vcpu);
10235 if (r <= 0)
10236 goto out;
10237 } else
10238 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
10239
10240 if (kvm_run->immediate_exit)
10241 r = -EINTR;
10242 else
10243 r = vcpu_run(vcpu);
10244
10245out:
10246 kvm_put_guest_fpu(vcpu);
10247 if (kvm_run->kvm_valid_regs)
10248 store_regs(vcpu);
10249 post_kvm_run_save(vcpu);
10250 kvm_sigset_deactivate(vcpu);
10251
10252 vcpu_put(vcpu);
10253 return r;
10254}
10255
10256static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10257{
10258 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
10259
10260
10261
10262
10263
10264
10265
10266 emulator_writeback_register_cache(vcpu->arch.emulate_ctxt);
10267 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
10268 }
10269 regs->rax = kvm_rax_read(vcpu);
10270 regs->rbx = kvm_rbx_read(vcpu);
10271 regs->rcx = kvm_rcx_read(vcpu);
10272 regs->rdx = kvm_rdx_read(vcpu);
10273 regs->rsi = kvm_rsi_read(vcpu);
10274 regs->rdi = kvm_rdi_read(vcpu);
10275 regs->rsp = kvm_rsp_read(vcpu);
10276 regs->rbp = kvm_rbp_read(vcpu);
10277#ifdef CONFIG_X86_64
10278 regs->r8 = kvm_r8_read(vcpu);
10279 regs->r9 = kvm_r9_read(vcpu);
10280 regs->r10 = kvm_r10_read(vcpu);
10281 regs->r11 = kvm_r11_read(vcpu);
10282 regs->r12 = kvm_r12_read(vcpu);
10283 regs->r13 = kvm_r13_read(vcpu);
10284 regs->r14 = kvm_r14_read(vcpu);
10285 regs->r15 = kvm_r15_read(vcpu);
10286#endif
10287
10288 regs->rip = kvm_rip_read(vcpu);
10289 regs->rflags = kvm_get_rflags(vcpu);
10290}
10291
10292int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10293{
10294 vcpu_load(vcpu);
10295 __get_regs(vcpu, regs);
10296 vcpu_put(vcpu);
10297 return 0;
10298}
10299
10300static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10301{
10302 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
10303 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
10304
10305 kvm_rax_write(vcpu, regs->rax);
10306 kvm_rbx_write(vcpu, regs->rbx);
10307 kvm_rcx_write(vcpu, regs->rcx);
10308 kvm_rdx_write(vcpu, regs->rdx);
10309 kvm_rsi_write(vcpu, regs->rsi);
10310 kvm_rdi_write(vcpu, regs->rdi);
10311 kvm_rsp_write(vcpu, regs->rsp);
10312 kvm_rbp_write(vcpu, regs->rbp);
10313#ifdef CONFIG_X86_64
10314 kvm_r8_write(vcpu, regs->r8);
10315 kvm_r9_write(vcpu, regs->r9);
10316 kvm_r10_write(vcpu, regs->r10);
10317 kvm_r11_write(vcpu, regs->r11);
10318 kvm_r12_write(vcpu, regs->r12);
10319 kvm_r13_write(vcpu, regs->r13);
10320 kvm_r14_write(vcpu, regs->r14);
10321 kvm_r15_write(vcpu, regs->r15);
10322#endif
10323
10324 kvm_rip_write(vcpu, regs->rip);
10325 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
10326
10327 vcpu->arch.exception.pending = false;
10328
10329 kvm_make_request(KVM_REQ_EVENT, vcpu);
10330}
10331
10332int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10333{
10334 vcpu_load(vcpu);
10335 __set_regs(vcpu, regs);
10336 vcpu_put(vcpu);
10337 return 0;
10338}
10339
10340void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
10341{
10342 struct kvm_segment cs;
10343
10344 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
10345 *db = cs.db;
10346 *l = cs.l;
10347}
10348EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
10349
10350static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10351{
10352 struct desc_ptr dt;
10353
10354 if (vcpu->arch.guest_state_protected)
10355 goto skip_protected_regs;
10356
10357 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
10358 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
10359 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
10360 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
10361 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
10362 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
10363
10364 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
10365 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
10366
10367 static_call(kvm_x86_get_idt)(vcpu, &dt);
10368 sregs->idt.limit = dt.size;
10369 sregs->idt.base = dt.address;
10370 static_call(kvm_x86_get_gdt)(vcpu, &dt);
10371 sregs->gdt.limit = dt.size;
10372 sregs->gdt.base = dt.address;
10373
10374 sregs->cr2 = vcpu->arch.cr2;
10375 sregs->cr3 = kvm_read_cr3(vcpu);
10376
10377skip_protected_regs:
10378 sregs->cr0 = kvm_read_cr0(vcpu);
10379 sregs->cr4 = kvm_read_cr4(vcpu);
10380 sregs->cr8 = kvm_get_cr8(vcpu);
10381 sregs->efer = vcpu->arch.efer;
10382 sregs->apic_base = kvm_get_apic_base(vcpu);
10383}
10384
10385static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10386{
10387 __get_sregs_common(vcpu, sregs);
10388
10389 if (vcpu->arch.guest_state_protected)
10390 return;
10391
10392 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
10393 set_bit(vcpu->arch.interrupt.nr,
10394 (unsigned long *)sregs->interrupt_bitmap);
10395}
10396
10397static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
10398{
10399 int i;
10400
10401 __get_sregs_common(vcpu, (struct kvm_sregs *)sregs2);
10402
10403 if (vcpu->arch.guest_state_protected)
10404 return;
10405
10406 if (is_pae_paging(vcpu)) {
10407 for (i = 0 ; i < 4 ; i++)
10408 sregs2->pdptrs[i] = kvm_pdptr_read(vcpu, i);
10409 sregs2->flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID;
10410 }
10411}
10412
10413int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
10414 struct kvm_sregs *sregs)
10415{
10416 vcpu_load(vcpu);
10417 __get_sregs(vcpu, sregs);
10418 vcpu_put(vcpu);
10419 return 0;
10420}
10421
10422int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
10423 struct kvm_mp_state *mp_state)
10424{
10425 int r;
10426
10427 vcpu_load(vcpu);
10428 if (kvm_mpx_supported())
10429 kvm_load_guest_fpu(vcpu);
10430
10431 r = kvm_apic_accept_events(vcpu);
10432 if (r < 0)
10433 goto out;
10434 r = 0;
10435
10436 if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
10437 vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
10438 vcpu->arch.pv.pv_unhalted)
10439 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
10440 else
10441 mp_state->mp_state = vcpu->arch.mp_state;
10442
10443out:
10444 if (kvm_mpx_supported())
10445 kvm_put_guest_fpu(vcpu);
10446 vcpu_put(vcpu);
10447 return r;
10448}
10449
10450int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
10451 struct kvm_mp_state *mp_state)
10452{
10453 int ret = -EINVAL;
10454
10455 vcpu_load(vcpu);
10456
10457 if (!lapic_in_kernel(vcpu) &&
10458 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
10459 goto out;
10460
10461
10462
10463
10464
10465
10466 if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
10467 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
10468 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
10469 goto out;
10470
10471 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
10472 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
10473 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
10474 } else
10475 vcpu->arch.mp_state = mp_state->mp_state;
10476 kvm_make_request(KVM_REQ_EVENT, vcpu);
10477
10478 ret = 0;
10479out:
10480 vcpu_put(vcpu);
10481 return ret;
10482}
10483
10484int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
10485 int reason, bool has_error_code, u32 error_code)
10486{
10487 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
10488 int ret;
10489
10490 init_emulate_ctxt(vcpu);
10491
10492 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
10493 has_error_code, error_code);
10494 if (ret) {
10495 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
10496 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
10497 vcpu->run->internal.ndata = 0;
10498 return 0;
10499 }
10500
10501 kvm_rip_write(vcpu, ctxt->eip);
10502 kvm_set_rflags(vcpu, ctxt->eflags);
10503 return 1;
10504}
10505EXPORT_SYMBOL_GPL(kvm_task_switch);
10506
10507static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10508{
10509 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
10510
10511
10512
10513
10514
10515 if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
10516 return false;
10517 if (kvm_vcpu_is_illegal_gpa(vcpu, sregs->cr3))
10518 return false;
10519 } else {
10520
10521
10522
10523
10524 if (sregs->efer & EFER_LMA || sregs->cs.l)
10525 return false;
10526 }
10527
10528 return kvm_is_valid_cr4(vcpu, sregs->cr4);
10529}
10530
10531static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
10532 int *mmu_reset_needed, bool update_pdptrs)
10533{
10534 struct msr_data apic_base_msr;
10535 int idx;
10536 struct desc_ptr dt;
10537
10538 if (!kvm_is_valid_sregs(vcpu, sregs))
10539 return -EINVAL;
10540
10541 apic_base_msr.data = sregs->apic_base;
10542 apic_base_msr.host_initiated = true;
10543 if (kvm_set_apic_base(vcpu, &apic_base_msr))
10544 return -EINVAL;
10545
10546 if (vcpu->arch.guest_state_protected)
10547 return 0;
10548
10549 dt.size = sregs->idt.limit;
10550 dt.address = sregs->idt.base;
10551 static_call(kvm_x86_set_idt)(vcpu, &dt);
10552 dt.size = sregs->gdt.limit;
10553 dt.address = sregs->gdt.base;
10554 static_call(kvm_x86_set_gdt)(vcpu, &dt);
10555
10556 vcpu->arch.cr2 = sregs->cr2;
10557 *mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
10558 vcpu->arch.cr3 = sregs->cr3;
10559 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
10560
10561 kvm_set_cr8(vcpu, sregs->cr8);
10562
10563 *mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
10564 static_call(kvm_x86_set_efer)(vcpu, sregs->efer);
10565
10566 *mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
10567 static_call(kvm_x86_set_cr0)(vcpu, sregs->cr0);
10568 vcpu->arch.cr0 = sregs->cr0;
10569
10570 *mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
10571 static_call(kvm_x86_set_cr4)(vcpu, sregs->cr4);
10572
10573 if (update_pdptrs) {
10574 idx = srcu_read_lock(&vcpu->kvm->srcu);
10575 if (is_pae_paging(vcpu)) {
10576 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
10577 *mmu_reset_needed = 1;
10578 }
10579 srcu_read_unlock(&vcpu->kvm->srcu, idx);
10580 }
10581
10582 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
10583 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
10584 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
10585 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
10586 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
10587 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
10588
10589 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
10590 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
10591
10592 update_cr8_intercept(vcpu);
10593
10594
10595 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
10596 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
10597 !is_protmode(vcpu))
10598 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10599
10600 return 0;
10601}
10602
10603static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10604{
10605 int pending_vec, max_bits;
10606 int mmu_reset_needed = 0;
10607 int ret = __set_sregs_common(vcpu, sregs, &mmu_reset_needed, true);
10608
10609 if (ret)
10610 return ret;
10611
10612 if (mmu_reset_needed)
10613 kvm_mmu_reset_context(vcpu);
10614
10615 max_bits = KVM_NR_INTERRUPTS;
10616 pending_vec = find_first_bit(
10617 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
10618
10619 if (pending_vec < max_bits) {
10620 kvm_queue_interrupt(vcpu, pending_vec, false);
10621 pr_debug("Set back pending irq %d\n", pending_vec);
10622 kvm_make_request(KVM_REQ_EVENT, vcpu);
10623 }
10624 return 0;
10625}
10626
10627static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
10628{
10629 int mmu_reset_needed = 0;
10630 bool valid_pdptrs = sregs2->flags & KVM_SREGS2_FLAGS_PDPTRS_VALID;
10631 bool pae = (sregs2->cr0 & X86_CR0_PG) && (sregs2->cr4 & X86_CR4_PAE) &&
10632 !(sregs2->efer & EFER_LMA);
10633 int i, ret;
10634
10635 if (sregs2->flags & ~KVM_SREGS2_FLAGS_PDPTRS_VALID)
10636 return -EINVAL;
10637
10638 if (valid_pdptrs && (!pae || vcpu->arch.guest_state_protected))
10639 return -EINVAL;
10640
10641 ret = __set_sregs_common(vcpu, (struct kvm_sregs *)sregs2,
10642 &mmu_reset_needed, !valid_pdptrs);
10643 if (ret)
10644 return ret;
10645
10646 if (valid_pdptrs) {
10647 for (i = 0; i < 4 ; i++)
10648 kvm_pdptr_write(vcpu, i, sregs2->pdptrs[i]);
10649
10650 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
10651 mmu_reset_needed = 1;
10652 vcpu->arch.pdptrs_from_userspace = true;
10653 }
10654 if (mmu_reset_needed)
10655 kvm_mmu_reset_context(vcpu);
10656 return 0;
10657}
10658
10659int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
10660 struct kvm_sregs *sregs)
10661{
10662 int ret;
10663
10664 vcpu_load(vcpu);
10665 ret = __set_sregs(vcpu, sregs);
10666 vcpu_put(vcpu);
10667 return ret;
10668}
10669
10670static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
10671{
10672 bool inhibit = false;
10673 struct kvm_vcpu *vcpu;
10674 int i;
10675
10676 down_write(&kvm->arch.apicv_update_lock);
10677
10678 kvm_for_each_vcpu(i, vcpu, kvm) {
10679 if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
10680 inhibit = true;
10681 break;
10682 }
10683 }
10684 __kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ);
10685 up_write(&kvm->arch.apicv_update_lock);
10686}
10687
10688int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
10689 struct kvm_guest_debug *dbg)
10690{
10691 unsigned long rflags;
10692 int i, r;
10693
10694 if (vcpu->arch.guest_state_protected)
10695 return -EINVAL;
10696
10697 vcpu_load(vcpu);
10698
10699 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
10700 r = -EBUSY;
10701 if (vcpu->arch.exception.pending)
10702 goto out;
10703 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
10704 kvm_queue_exception(vcpu, DB_VECTOR);
10705 else
10706 kvm_queue_exception(vcpu, BP_VECTOR);
10707 }
10708
10709
10710
10711
10712
10713 rflags = kvm_get_rflags(vcpu);
10714
10715 vcpu->guest_debug = dbg->control;
10716 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
10717 vcpu->guest_debug = 0;
10718
10719 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
10720 for (i = 0; i < KVM_NR_DB_REGS; ++i)
10721 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
10722 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
10723 } else {
10724 for (i = 0; i < KVM_NR_DB_REGS; i++)
10725 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
10726 }
10727 kvm_update_dr7(vcpu);
10728
10729 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
10730 vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu);
10731
10732
10733
10734
10735
10736 kvm_set_rflags(vcpu, rflags);
10737
10738 static_call(kvm_x86_update_exception_bitmap)(vcpu);
10739
10740 kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
10741
10742 r = 0;
10743
10744out:
10745 vcpu_put(vcpu);
10746 return r;
10747}
10748
10749
10750
10751
10752int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
10753 struct kvm_translation *tr)
10754{
10755 unsigned long vaddr = tr->linear_address;
10756 gpa_t gpa;
10757 int idx;
10758
10759 vcpu_load(vcpu);
10760
10761 idx = srcu_read_lock(&vcpu->kvm->srcu);
10762 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
10763 srcu_read_unlock(&vcpu->kvm->srcu, idx);
10764 tr->physical_address = gpa;
10765 tr->valid = gpa != UNMAPPED_GVA;
10766 tr->writeable = 1;
10767 tr->usermode = 0;
10768
10769 vcpu_put(vcpu);
10770 return 0;
10771}
10772
10773int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
10774{
10775 struct fxregs_state *fxsave;
10776
10777 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
10778 return 0;
10779
10780 vcpu_load(vcpu);
10781
10782 fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
10783 memcpy(fpu->fpr, fxsave->st_space, 128);
10784 fpu->fcw = fxsave->cwd;
10785 fpu->fsw = fxsave->swd;
10786 fpu->ftwx = fxsave->twd;
10787 fpu->last_opcode = fxsave->fop;
10788 fpu->last_ip = fxsave->rip;
10789 fpu->last_dp = fxsave->rdp;
10790 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
10791
10792 vcpu_put(vcpu);
10793 return 0;
10794}
10795
10796int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
10797{
10798 struct fxregs_state *fxsave;
10799
10800 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
10801 return 0;
10802
10803 vcpu_load(vcpu);
10804
10805 fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
10806
10807 memcpy(fxsave->st_space, fpu->fpr, 128);
10808 fxsave->cwd = fpu->fcw;
10809 fxsave->swd = fpu->fsw;
10810 fxsave->twd = fpu->ftwx;
10811 fxsave->fop = fpu->last_opcode;
10812 fxsave->rip = fpu->last_ip;
10813 fxsave->rdp = fpu->last_dp;
10814 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
10815
10816 vcpu_put(vcpu);
10817 return 0;
10818}
10819
10820static void store_regs(struct kvm_vcpu *vcpu)
10821{
10822 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
10823
10824 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
10825 __get_regs(vcpu, &vcpu->run->s.regs.regs);
10826
10827 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
10828 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
10829
10830 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
10831 kvm_vcpu_ioctl_x86_get_vcpu_events(
10832 vcpu, &vcpu->run->s.regs.events);
10833}
10834
10835static int sync_regs(struct kvm_vcpu *vcpu)
10836{
10837 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
10838 __set_regs(vcpu, &vcpu->run->s.regs.regs);
10839 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
10840 }
10841 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
10842 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
10843 return -EINVAL;
10844 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
10845 }
10846 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
10847 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
10848 vcpu, &vcpu->run->s.regs.events))
10849 return -EINVAL;
10850 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
10851 }
10852
10853 return 0;
10854}
10855
10856int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
10857{
10858 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
10859 pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
10860 "guest TSC will not be reliable\n");
10861
10862 return 0;
10863}
10864
10865int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
10866{
10867 struct page *page;
10868 int r;
10869
10870 vcpu->arch.last_vmentry_cpu = -1;
10871 vcpu->arch.regs_avail = ~0;
10872 vcpu->arch.regs_dirty = ~0;
10873
10874 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
10875 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10876 else
10877 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
10878
10879 r = kvm_mmu_create(vcpu);
10880 if (r < 0)
10881 return r;
10882
10883 if (irqchip_in_kernel(vcpu->kvm)) {
10884 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
10885 if (r < 0)
10886 goto fail_mmu_destroy;
10887 if (kvm_apicv_activated(vcpu->kvm))
10888 vcpu->arch.apicv_active = true;
10889 } else
10890 static_branch_inc(&kvm_has_noapic_vcpu);
10891
10892 r = -ENOMEM;
10893
10894 page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
10895 if (!page)
10896 goto fail_free_lapic;
10897 vcpu->arch.pio_data = page_address(page);
10898
10899 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
10900 GFP_KERNEL_ACCOUNT);
10901 if (!vcpu->arch.mce_banks)
10902 goto fail_free_pio_data;
10903 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
10904
10905 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
10906 GFP_KERNEL_ACCOUNT))
10907 goto fail_free_mce_banks;
10908
10909 if (!alloc_emulate_ctxt(vcpu))
10910 goto free_wbinvd_dirty_mask;
10911
10912 if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) {
10913 pr_err("kvm: failed to allocate vcpu's fpu\n");
10914 goto free_emulate_ctxt;
10915 }
10916
10917 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
10918 vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
10919
10920 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
10921
10922 kvm_async_pf_hash_reset(vcpu);
10923 kvm_pmu_init(vcpu);
10924
10925 vcpu->arch.pending_external_vector = -1;
10926 vcpu->arch.preempted_in_kernel = false;
10927
10928#if IS_ENABLED(CONFIG_HYPERV)
10929 vcpu->arch.hv_root_tdp = INVALID_PAGE;
10930#endif
10931
10932 r = static_call(kvm_x86_vcpu_create)(vcpu);
10933 if (r)
10934 goto free_guest_fpu;
10935
10936 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
10937 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
10938 kvm_vcpu_mtrr_init(vcpu);
10939 vcpu_load(vcpu);
10940 kvm_set_tsc_khz(vcpu, max_tsc_khz);
10941 kvm_vcpu_reset(vcpu, false);
10942 kvm_init_mmu(vcpu);
10943 vcpu_put(vcpu);
10944 return 0;
10945
10946free_guest_fpu:
10947 fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
10948free_emulate_ctxt:
10949 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
10950free_wbinvd_dirty_mask:
10951 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
10952fail_free_mce_banks:
10953 kfree(vcpu->arch.mce_banks);
10954fail_free_pio_data:
10955 free_page((unsigned long)vcpu->arch.pio_data);
10956fail_free_lapic:
10957 kvm_free_lapic(vcpu);
10958fail_mmu_destroy:
10959 kvm_mmu_destroy(vcpu);
10960 return r;
10961}
10962
10963void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
10964{
10965 struct kvm *kvm = vcpu->kvm;
10966
10967 if (mutex_lock_killable(&vcpu->mutex))
10968 return;
10969 vcpu_load(vcpu);
10970 kvm_synchronize_tsc(vcpu, 0);
10971 vcpu_put(vcpu);
10972
10973
10974 vcpu->arch.msr_kvm_poll_control = 1;
10975
10976 mutex_unlock(&vcpu->mutex);
10977
10978 if (kvmclock_periodic_sync && vcpu->vcpu_idx == 0)
10979 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
10980 KVMCLOCK_SYNC_PERIOD);
10981}
10982
10983void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
10984{
10985 int idx;
10986
10987 kvmclock_reset(vcpu);
10988
10989 static_call(kvm_x86_vcpu_free)(vcpu);
10990
10991 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
10992 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
10993 fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
10994
10995 kvm_hv_vcpu_uninit(vcpu);
10996 kvm_pmu_destroy(vcpu);
10997 kfree(vcpu->arch.mce_banks);
10998 kvm_free_lapic(vcpu);
10999 idx = srcu_read_lock(&vcpu->kvm->srcu);
11000 kvm_mmu_destroy(vcpu);
11001 srcu_read_unlock(&vcpu->kvm->srcu, idx);
11002 free_page((unsigned long)vcpu->arch.pio_data);
11003 kvfree(vcpu->arch.cpuid_entries);
11004 if (!lapic_in_kernel(vcpu))
11005 static_branch_dec(&kvm_has_noapic_vcpu);
11006}
11007
11008void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
11009{
11010 struct kvm_cpuid_entry2 *cpuid_0x1;
11011 unsigned long old_cr0 = kvm_read_cr0(vcpu);
11012 unsigned long new_cr0;
11013
11014
11015
11016
11017
11018
11019
11020
11021 WARN_ON_ONCE(!init_event &&
11022 (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu)));
11023
11024 kvm_lapic_reset(vcpu, init_event);
11025
11026 vcpu->arch.hflags = 0;
11027
11028 vcpu->arch.smi_pending = 0;
11029 vcpu->arch.smi_count = 0;
11030 atomic_set(&vcpu->arch.nmi_queued, 0);
11031 vcpu->arch.nmi_pending = 0;
11032 vcpu->arch.nmi_injected = false;
11033 kvm_clear_interrupt_queue(vcpu);
11034 kvm_clear_exception_queue(vcpu);
11035
11036 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
11037 kvm_update_dr0123(vcpu);
11038 vcpu->arch.dr6 = DR6_ACTIVE_LOW;
11039 vcpu->arch.dr7 = DR7_FIXED_1;
11040 kvm_update_dr7(vcpu);
11041
11042 vcpu->arch.cr2 = 0;
11043
11044 kvm_make_request(KVM_REQ_EVENT, vcpu);
11045 vcpu->arch.apf.msr_en_val = 0;
11046 vcpu->arch.apf.msr_int_val = 0;
11047 vcpu->arch.st.msr_val = 0;
11048
11049 kvmclock_reset(vcpu);
11050
11051 kvm_clear_async_pf_completion_queue(vcpu);
11052 kvm_async_pf_hash_reset(vcpu);
11053 vcpu->arch.apf.halted = false;
11054
11055 if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
11056 struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
11057
11058
11059
11060
11061
11062 if (init_event)
11063 kvm_put_guest_fpu(vcpu);
11064
11065 fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
11066 fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
11067
11068 if (init_event)
11069 kvm_load_guest_fpu(vcpu);
11070 }
11071
11072 if (!init_event) {
11073 kvm_pmu_reset(vcpu);
11074 vcpu->arch.smbase = 0x30000;
11075
11076 vcpu->arch.msr_misc_features_enables = 0;
11077
11078 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
11079 }
11080
11081
11082 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
11083 kvm_register_mark_dirty(vcpu, VCPU_REGS_RSP);
11084
11085
11086
11087
11088
11089
11090
11091
11092 cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
11093 kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
11094
11095 vcpu->arch.ia32_xss = 0;
11096
11097 static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
11098
11099 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
11100 kvm_rip_write(vcpu, 0xfff0);
11101
11102 vcpu->arch.cr3 = 0;
11103 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
11104
11105
11106
11107
11108
11109
11110 new_cr0 = X86_CR0_ET;
11111 if (init_event)
11112 new_cr0 |= (old_cr0 & (X86_CR0_NW | X86_CR0_CD));
11113 else
11114 new_cr0 |= X86_CR0_NW | X86_CR0_CD;
11115
11116 static_call(kvm_x86_set_cr0)(vcpu, new_cr0);
11117 static_call(kvm_x86_set_cr4)(vcpu, 0);
11118 static_call(kvm_x86_set_efer)(vcpu, 0);
11119 static_call(kvm_x86_update_exception_bitmap)(vcpu);
11120
11121
11122
11123
11124
11125
11126
11127
11128
11129 if (old_cr0 & X86_CR0_PG)
11130 kvm_mmu_reset_context(vcpu);
11131
11132
11133
11134
11135
11136
11137
11138
11139
11140
11141 if (init_event)
11142 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
11143}
11144EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
11145
11146void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
11147{
11148 struct kvm_segment cs;
11149
11150 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
11151 cs.selector = vector << 8;
11152 cs.base = vector << 12;
11153 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
11154 kvm_rip_write(vcpu, 0);
11155}
11156EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
11157
11158int kvm_arch_hardware_enable(void)
11159{
11160 struct kvm *kvm;
11161 struct kvm_vcpu *vcpu;
11162 int i;
11163 int ret;
11164 u64 local_tsc;
11165 u64 max_tsc = 0;
11166 bool stable, backwards_tsc = false;
11167
11168 kvm_user_return_msr_cpu_online();
11169 ret = static_call(kvm_x86_hardware_enable)();
11170 if (ret != 0)
11171 return ret;
11172
11173 local_tsc = rdtsc();
11174 stable = !kvm_check_tsc_unstable();
11175 list_for_each_entry(kvm, &vm_list, vm_list) {
11176 kvm_for_each_vcpu(i, vcpu, kvm) {
11177 if (!stable && vcpu->cpu == smp_processor_id())
11178 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
11179 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
11180 backwards_tsc = true;
11181 if (vcpu->arch.last_host_tsc > max_tsc)
11182 max_tsc = vcpu->arch.last_host_tsc;
11183 }
11184 }
11185 }
11186
11187
11188
11189
11190
11191
11192
11193
11194
11195
11196
11197
11198
11199
11200
11201
11202
11203
11204
11205
11206
11207
11208
11209
11210
11211
11212
11213
11214
11215
11216
11217
11218
11219
11220
11221
11222
11223
11224
11225 if (backwards_tsc) {
11226 u64 delta_cyc = max_tsc - local_tsc;
11227 list_for_each_entry(kvm, &vm_list, vm_list) {
11228 kvm->arch.backwards_tsc_observed = true;
11229 kvm_for_each_vcpu(i, vcpu, kvm) {
11230 vcpu->arch.tsc_offset_adjustment += delta_cyc;
11231 vcpu->arch.last_host_tsc = local_tsc;
11232 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
11233 }
11234
11235
11236
11237
11238
11239
11240
11241 kvm->arch.last_tsc_nsec = 0;
11242 kvm->arch.last_tsc_write = 0;
11243 }
11244
11245 }
11246 return 0;
11247}
11248
11249void kvm_arch_hardware_disable(void)
11250{
11251 static_call(kvm_x86_hardware_disable)();
11252 drop_user_return_notifiers();
11253}
11254
11255int kvm_arch_hardware_setup(void *opaque)
11256{
11257 struct kvm_x86_init_ops *ops = opaque;
11258 int r;
11259
11260 rdmsrl_safe(MSR_EFER, &host_efer);
11261
11262 if (boot_cpu_has(X86_FEATURE_XSAVES))
11263 rdmsrl(MSR_IA32_XSS, host_xss);
11264
11265 r = ops->hardware_setup();
11266 if (r != 0)
11267 return r;
11268
11269 memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
11270 kvm_ops_static_call_update();
11271
11272 if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
11273 supported_xss = 0;
11274
11275#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
11276 cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
11277#undef __kvm_cpu_cap_has
11278
11279 if (kvm_has_tsc_control) {
11280
11281
11282
11283
11284
11285
11286 u64 max = min(0x7fffffffULL,
11287 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
11288 kvm_max_guest_tsc_khz = max;
11289
11290 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
11291 }
11292
11293 kvm_init_msr_list();
11294 return 0;
11295}
11296
11297void kvm_arch_hardware_unsetup(void)
11298{
11299 static_call(kvm_x86_hardware_unsetup)();
11300}
11301
11302int kvm_arch_check_processor_compat(void *opaque)
11303{
11304 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
11305 struct kvm_x86_init_ops *ops = opaque;
11306
11307 WARN_ON(!irqs_disabled());
11308
11309 if (__cr4_reserved_bits(cpu_has, c) !=
11310 __cr4_reserved_bits(cpu_has, &boot_cpu_data))
11311 return -EIO;
11312
11313 return ops->check_processor_compatibility();
11314}
11315
11316bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
11317{
11318 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
11319}
11320EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
11321
11322bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
11323{
11324 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
11325}
11326
11327__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
11328EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
11329
11330void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
11331{
11332 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
11333
11334 vcpu->arch.l1tf_flush_l1d = true;
11335 if (pmu->version && unlikely(pmu->event_count)) {
11336 pmu->need_cleanup = true;
11337 kvm_make_request(KVM_REQ_PMU, vcpu);
11338 }
11339 static_call(kvm_x86_sched_in)(vcpu, cpu);
11340}
11341
11342void kvm_arch_free_vm(struct kvm *kvm)
11343{
11344 kfree(to_kvm_hv(kvm)->hv_pa_pg);
11345 __kvm_arch_free_vm(kvm);
11346}
11347
11348
11349int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
11350{
11351 int ret;
11352 unsigned long flags;
11353
11354 if (type)
11355 return -EINVAL;
11356
11357 ret = kvm_page_track_init(kvm);
11358 if (ret)
11359 return ret;
11360
11361 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
11362 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
11363 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
11364 INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
11365 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
11366 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
11367
11368
11369 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
11370
11371 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
11372 &kvm->arch.irq_sources_bitmap);
11373
11374 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
11375 mutex_init(&kvm->arch.apic_map_lock);
11376 seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock);
11377 kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
11378
11379 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
11380 pvclock_update_vm_gtod_copy(kvm);
11381 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
11382
11383 kvm->arch.guest_can_read_msr_platform_info = true;
11384
11385#if IS_ENABLED(CONFIG_HYPERV)
11386 spin_lock_init(&kvm->arch.hv_root_tdp_lock);
11387 kvm->arch.hv_root_tdp = INVALID_PAGE;
11388#endif
11389
11390 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
11391 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
11392
11393 kvm_apicv_init(kvm);
11394 kvm_hv_init_vm(kvm);
11395 kvm_mmu_init_vm(kvm);
11396 kvm_xen_init_vm(kvm);
11397
11398 return static_call(kvm_x86_vm_init)(kvm);
11399}
11400
11401int kvm_arch_post_init_vm(struct kvm *kvm)
11402{
11403 return kvm_mmu_post_init_vm(kvm);
11404}
11405
11406static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
11407{
11408 vcpu_load(vcpu);
11409 kvm_mmu_unload(vcpu);
11410 vcpu_put(vcpu);
11411}
11412
11413static void kvm_free_vcpus(struct kvm *kvm)
11414{
11415 unsigned int i;
11416 struct kvm_vcpu *vcpu;
11417
11418
11419
11420
11421 kvm_for_each_vcpu(i, vcpu, kvm) {
11422 kvm_clear_async_pf_completion_queue(vcpu);
11423 kvm_unload_vcpu_mmu(vcpu);
11424 }
11425 kvm_for_each_vcpu(i, vcpu, kvm)
11426 kvm_vcpu_destroy(vcpu);
11427
11428 mutex_lock(&kvm->lock);
11429 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
11430 kvm->vcpus[i] = NULL;
11431
11432 atomic_set(&kvm->online_vcpus, 0);
11433 mutex_unlock(&kvm->lock);
11434}
11435
11436void kvm_arch_sync_events(struct kvm *kvm)
11437{
11438 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
11439 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
11440 kvm_free_pit(kvm);
11441}
11442
11443#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
11444
11445
11446
11447
11448
11449
11450
11451
11452
11453
11454
11455
11456
11457
11458
11459
11460
11461
11462
11463
11464
11465
11466
11467void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
11468 u32 size)
11469{
11470 int i, r;
11471 unsigned long hva, old_npages;
11472 struct kvm_memslots *slots = kvm_memslots(kvm);
11473 struct kvm_memory_slot *slot;
11474
11475
11476 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
11477 return ERR_PTR_USR(-EINVAL);
11478
11479 slot = id_to_memslot(slots, id);
11480 if (size) {
11481 if (slot && slot->npages)
11482 return ERR_PTR_USR(-EEXIST);
11483
11484
11485
11486
11487
11488 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
11489 MAP_SHARED | MAP_ANONYMOUS, 0);
11490 if (IS_ERR((void *)hva))
11491 return (void __user *)hva;
11492 } else {
11493 if (!slot || !slot->npages)
11494 return NULL;
11495
11496 old_npages = slot->npages;
11497 hva = slot->userspace_addr;
11498 }
11499
11500 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
11501 struct kvm_userspace_memory_region m;
11502
11503 m.slot = id | (i << 16);
11504 m.flags = 0;
11505 m.guest_phys_addr = gpa;
11506 m.userspace_addr = hva;
11507 m.memory_size = size;
11508 r = __kvm_set_memory_region(kvm, &m);
11509 if (r < 0)
11510 return ERR_PTR_USR(r);
11511 }
11512
11513 if (!size)
11514 vm_munmap(hva, old_npages * PAGE_SIZE);
11515
11516 return (void __user *)hva;
11517}
11518EXPORT_SYMBOL_GPL(__x86_set_memory_region);
11519
11520void kvm_arch_pre_destroy_vm(struct kvm *kvm)
11521{
11522 kvm_mmu_pre_destroy_vm(kvm);
11523}
11524
11525void kvm_arch_destroy_vm(struct kvm *kvm)
11526{
11527 if (current->mm == kvm->mm) {
11528
11529
11530
11531
11532
11533 mutex_lock(&kvm->slots_lock);
11534 __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
11535 0, 0);
11536 __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
11537 0, 0);
11538 __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
11539 mutex_unlock(&kvm->slots_lock);
11540 }
11541 static_call_cond(kvm_x86_vm_destroy)(kvm);
11542 kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
11543 kvm_pic_destroy(kvm);
11544 kvm_ioapic_destroy(kvm);
11545 kvm_free_vcpus(kvm);
11546 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
11547 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
11548 kvm_mmu_uninit_vm(kvm);
11549 kvm_page_track_cleanup(kvm);
11550 kvm_xen_destroy_vm(kvm);
11551 kvm_hv_destroy_vm(kvm);
11552}
11553
11554static void memslot_rmap_free(struct kvm_memory_slot *slot)
11555{
11556 int i;
11557
11558 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
11559 kvfree(slot->arch.rmap[i]);
11560 slot->arch.rmap[i] = NULL;
11561 }
11562}
11563
11564void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
11565{
11566 int i;
11567
11568 memslot_rmap_free(slot);
11569
11570 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11571 kvfree(slot->arch.lpage_info[i - 1]);
11572 slot->arch.lpage_info[i - 1] = NULL;
11573 }
11574
11575 kvm_page_track_free_memslot(slot);
11576}
11577
11578int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
11579{
11580 const int sz = sizeof(*slot->arch.rmap[0]);
11581 int i;
11582
11583 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
11584 int level = i + 1;
11585 int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
11586
11587 if (slot->arch.rmap[i])
11588 continue;
11589
11590 slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
11591 if (!slot->arch.rmap[i]) {
11592 memslot_rmap_free(slot);
11593 return -ENOMEM;
11594 }
11595 }
11596
11597 return 0;
11598}
11599
11600static int kvm_alloc_memslot_metadata(struct kvm *kvm,
11601 struct kvm_memory_slot *slot,
11602 unsigned long npages)
11603{
11604 int i, r;
11605
11606
11607
11608
11609
11610
11611 memset(&slot->arch, 0, sizeof(slot->arch));
11612
11613 if (kvm_memslots_have_rmaps(kvm)) {
11614 r = memslot_rmap_alloc(slot, npages);
11615 if (r)
11616 return r;
11617 }
11618
11619 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11620 struct kvm_lpage_info *linfo;
11621 unsigned long ugfn;
11622 int lpages;
11623 int level = i + 1;
11624
11625 lpages = __kvm_mmu_slot_lpages(slot, npages, level);
11626
11627 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
11628 if (!linfo)
11629 goto out_free;
11630
11631 slot->arch.lpage_info[i - 1] = linfo;
11632
11633 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
11634 linfo[0].disallow_lpage = 1;
11635 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
11636 linfo[lpages - 1].disallow_lpage = 1;
11637 ugfn = slot->userspace_addr >> PAGE_SHIFT;
11638
11639
11640
11641
11642 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1)) {
11643 unsigned long j;
11644
11645 for (j = 0; j < lpages; ++j)
11646 linfo[j].disallow_lpage = 1;
11647 }
11648 }
11649
11650 if (kvm_page_track_create_memslot(kvm, slot, npages))
11651 goto out_free;
11652
11653 return 0;
11654
11655out_free:
11656 memslot_rmap_free(slot);
11657
11658 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11659 kvfree(slot->arch.lpage_info[i - 1]);
11660 slot->arch.lpage_info[i - 1] = NULL;
11661 }
11662 return -ENOMEM;
11663}
11664
11665void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
11666{
11667 struct kvm_vcpu *vcpu;
11668 int i;
11669
11670
11671
11672
11673
11674 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
11675
11676
11677 kvm_for_each_vcpu(i, vcpu, kvm)
11678 kvm_vcpu_kick(vcpu);
11679}
11680
11681int kvm_arch_prepare_memory_region(struct kvm *kvm,
11682 struct kvm_memory_slot *memslot,
11683 const struct kvm_userspace_memory_region *mem,
11684 enum kvm_mr_change change)
11685{
11686 if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
11687 return kvm_alloc_memslot_metadata(kvm, memslot,
11688 mem->memory_size >> PAGE_SHIFT);
11689 return 0;
11690}
11691
11692
11693static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
11694{
11695 struct kvm_arch *ka = &kvm->arch;
11696
11697 if (!kvm_x86_ops.cpu_dirty_log_size)
11698 return;
11699
11700 if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
11701 (!enable && --ka->cpu_dirty_logging_count == 0))
11702 kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
11703
11704 WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
11705}
11706
11707static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
11708 struct kvm_memory_slot *old,
11709 const struct kvm_memory_slot *new,
11710 enum kvm_mr_change change)
11711{
11712 bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
11713
11714
11715
11716
11717
11718 if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)
11719 kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
11720
11721
11722
11723
11724
11725
11726
11727
11728
11729
11730
11731
11732
11733
11734
11735
11736 if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
11737 return;
11738
11739
11740
11741
11742
11743
11744 if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)))
11745 return;
11746
11747 if (!log_dirty_pages) {
11748
11749
11750
11751
11752
11753
11754
11755
11756
11757
11758
11759
11760
11761 kvm_mmu_zap_collapsible_sptes(kvm, new);
11762 } else {
11763
11764
11765
11766
11767 if (kvm_dirty_log_manual_protect_and_init_set(kvm))
11768 return;
11769
11770 if (kvm_x86_ops.cpu_dirty_log_size) {
11771 kvm_mmu_slot_leaf_clear_dirty(kvm, new);
11772 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
11773 } else {
11774 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
11775 }
11776 }
11777}
11778
11779void kvm_arch_commit_memory_region(struct kvm *kvm,
11780 const struct kvm_userspace_memory_region *mem,
11781 struct kvm_memory_slot *old,
11782 const struct kvm_memory_slot *new,
11783 enum kvm_mr_change change)
11784{
11785 if (!kvm->arch.n_requested_mmu_pages)
11786 kvm_mmu_change_mmu_pages(kvm,
11787 kvm_mmu_calculate_default_mmu_pages(kvm));
11788
11789 kvm_mmu_slot_apply_flags(kvm, old, new, change);
11790
11791
11792 if (change == KVM_MR_MOVE)
11793 kvm_arch_free_memslot(kvm, old);
11794}
11795
11796void kvm_arch_flush_shadow_all(struct kvm *kvm)
11797{
11798 kvm_mmu_zap_all(kvm);
11799}
11800
11801void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
11802 struct kvm_memory_slot *slot)
11803{
11804 kvm_page_track_flush_slot(kvm, slot);
11805}
11806
11807static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
11808{
11809 return (is_guest_mode(vcpu) &&
11810 kvm_x86_ops.guest_apic_has_interrupt &&
11811 static_call(kvm_x86_guest_apic_has_interrupt)(vcpu));
11812}
11813
11814static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
11815{
11816 if (!list_empty_careful(&vcpu->async_pf.done))
11817 return true;
11818
11819 if (kvm_apic_has_events(vcpu))
11820 return true;
11821
11822 if (vcpu->arch.pv.pv_unhalted)
11823 return true;
11824
11825 if (vcpu->arch.exception.pending)
11826 return true;
11827
11828 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
11829 (vcpu->arch.nmi_pending &&
11830 static_call(kvm_x86_nmi_allowed)(vcpu, false)))
11831 return true;
11832
11833 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
11834 (vcpu->arch.smi_pending &&
11835 static_call(kvm_x86_smi_allowed)(vcpu, false)))
11836 return true;
11837
11838 if (kvm_arch_interrupt_allowed(vcpu) &&
11839 (kvm_cpu_has_interrupt(vcpu) ||
11840 kvm_guest_apic_has_interrupt(vcpu)))
11841 return true;
11842
11843 if (kvm_hv_has_stimer_pending(vcpu))
11844 return true;
11845
11846 if (is_guest_mode(vcpu) &&
11847 kvm_x86_ops.nested_ops->hv_timer_pending &&
11848 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
11849 return true;
11850
11851 return false;
11852}
11853
11854int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
11855{
11856 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
11857}
11858
11859bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
11860{
11861 if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
11862 return true;
11863
11864 return false;
11865}
11866
11867bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
11868{
11869 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
11870 return true;
11871
11872 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
11873 kvm_test_request(KVM_REQ_SMI, vcpu) ||
11874 kvm_test_request(KVM_REQ_EVENT, vcpu))
11875 return true;
11876
11877 return kvm_arch_dy_has_pending_interrupt(vcpu);
11878}
11879
11880bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
11881{
11882 if (vcpu->arch.guest_state_protected)
11883 return true;
11884
11885 return vcpu->arch.preempted_in_kernel;
11886}
11887
11888int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
11889{
11890 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
11891}
11892
11893int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
11894{
11895 return static_call(kvm_x86_interrupt_allowed)(vcpu, false);
11896}
11897
11898unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
11899{
11900
11901 if (vcpu->arch.guest_state_protected)
11902 return 0;
11903
11904 if (is_64_bit_mode(vcpu))
11905 return kvm_rip_read(vcpu);
11906 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
11907 kvm_rip_read(vcpu));
11908}
11909EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
11910
11911bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
11912{
11913 return kvm_get_linear_rip(vcpu) == linear_rip;
11914}
11915EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
11916
11917unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
11918{
11919 unsigned long rflags;
11920
11921 rflags = static_call(kvm_x86_get_rflags)(vcpu);
11922 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
11923 rflags &= ~X86_EFLAGS_TF;
11924 return rflags;
11925}
11926EXPORT_SYMBOL_GPL(kvm_get_rflags);
11927
11928static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
11929{
11930 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
11931 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
11932 rflags |= X86_EFLAGS_TF;
11933 static_call(kvm_x86_set_rflags)(vcpu, rflags);
11934}
11935
11936void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
11937{
11938 __kvm_set_rflags(vcpu, rflags);
11939 kvm_make_request(KVM_REQ_EVENT, vcpu);
11940}
11941EXPORT_SYMBOL_GPL(kvm_set_rflags);
11942
11943void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
11944{
11945 int r;
11946
11947 if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
11948 work->wakeup_all)
11949 return;
11950
11951 r = kvm_mmu_reload(vcpu);
11952 if (unlikely(r))
11953 return;
11954
11955 if (!vcpu->arch.mmu->direct_map &&
11956 work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
11957 return;
11958
11959 kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
11960}
11961
11962static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
11963{
11964 BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
11965
11966 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
11967}
11968
11969static inline u32 kvm_async_pf_next_probe(u32 key)
11970{
11971 return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
11972}
11973
11974static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
11975{
11976 u32 key = kvm_async_pf_hash_fn(gfn);
11977
11978 while (vcpu->arch.apf.gfns[key] != ~0)
11979 key = kvm_async_pf_next_probe(key);
11980
11981 vcpu->arch.apf.gfns[key] = gfn;
11982}
11983
11984static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
11985{
11986 int i;
11987 u32 key = kvm_async_pf_hash_fn(gfn);
11988
11989 for (i = 0; i < ASYNC_PF_PER_VCPU &&
11990 (vcpu->arch.apf.gfns[key] != gfn &&
11991 vcpu->arch.apf.gfns[key] != ~0); i++)
11992 key = kvm_async_pf_next_probe(key);
11993
11994 return key;
11995}
11996
11997bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
11998{
11999 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
12000}
12001
12002static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
12003{
12004 u32 i, j, k;
12005
12006 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
12007
12008 if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
12009 return;
12010
12011 while (true) {
12012 vcpu->arch.apf.gfns[i] = ~0;
12013 do {
12014 j = kvm_async_pf_next_probe(j);
12015 if (vcpu->arch.apf.gfns[j] == ~0)
12016 return;
12017 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
12018
12019
12020
12021
12022
12023 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
12024 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
12025 i = j;
12026 }
12027}
12028
12029static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
12030{
12031 u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT;
12032
12033 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason,
12034 sizeof(reason));
12035}
12036
12037static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
12038{
12039 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
12040
12041 return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
12042 &token, offset, sizeof(token));
12043}
12044
12045static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
12046{
12047 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
12048 u32 val;
12049
12050 if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
12051 &val, offset, sizeof(val)))
12052 return false;
12053
12054 return !val;
12055}
12056
12057static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
12058{
12059 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
12060 return false;
12061
12062 if (!kvm_pv_async_pf_enabled(vcpu) ||
12063 (vcpu->arch.apf.send_user_only && static_call(kvm_x86_get_cpl)(vcpu) == 0))
12064 return false;
12065
12066 return true;
12067}
12068
12069bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
12070{
12071 if (unlikely(!lapic_in_kernel(vcpu) ||
12072 kvm_event_needs_reinjection(vcpu) ||
12073 vcpu->arch.exception.pending))
12074 return false;
12075
12076 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
12077 return false;
12078
12079
12080
12081
12082
12083 return kvm_arch_interrupt_allowed(vcpu);
12084}
12085
12086bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
12087 struct kvm_async_pf *work)
12088{
12089 struct x86_exception fault;
12090
12091 trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
12092 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
12093
12094 if (kvm_can_deliver_async_pf(vcpu) &&
12095 !apf_put_user_notpresent(vcpu)) {
12096 fault.vector = PF_VECTOR;
12097 fault.error_code_valid = true;
12098 fault.error_code = 0;
12099 fault.nested_page_fault = false;
12100 fault.address = work->arch.token;
12101 fault.async_page_fault = true;
12102 kvm_inject_page_fault(vcpu, &fault);
12103 return true;
12104 } else {
12105
12106
12107
12108
12109
12110
12111
12112
12113 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
12114 return false;
12115 }
12116}
12117
12118void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
12119 struct kvm_async_pf *work)
12120{
12121 struct kvm_lapic_irq irq = {
12122 .delivery_mode = APIC_DM_FIXED,
12123 .vector = vcpu->arch.apf.vec
12124 };
12125
12126 if (work->wakeup_all)
12127 work->arch.token = ~0;
12128 else
12129 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
12130 trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
12131
12132 if ((work->wakeup_all || work->notpresent_injected) &&
12133 kvm_pv_async_pf_enabled(vcpu) &&
12134 !apf_put_user_ready(vcpu, work->arch.token)) {
12135 vcpu->arch.apf.pageready_pending = true;
12136 kvm_apic_set_irq(vcpu, &irq, NULL);
12137 }
12138
12139 vcpu->arch.apf.halted = false;
12140 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
12141}
12142
12143void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
12144{
12145 kvm_make_request(KVM_REQ_APF_READY, vcpu);
12146 if (!vcpu->arch.apf.pageready_pending)
12147 kvm_vcpu_kick(vcpu);
12148}
12149
12150bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
12151{
12152 if (!kvm_pv_async_pf_enabled(vcpu))
12153 return true;
12154 else
12155 return kvm_lapic_enabled(vcpu) && apf_pageready_slot_free(vcpu);
12156}
12157
12158void kvm_arch_start_assignment(struct kvm *kvm)
12159{
12160 if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
12161 static_call_cond(kvm_x86_start_assignment)(kvm);
12162}
12163EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
12164
12165void kvm_arch_end_assignment(struct kvm *kvm)
12166{
12167 atomic_dec(&kvm->arch.assigned_device_count);
12168}
12169EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
12170
12171bool kvm_arch_has_assigned_device(struct kvm *kvm)
12172{
12173 return atomic_read(&kvm->arch.assigned_device_count);
12174}
12175EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
12176
12177void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
12178{
12179 atomic_inc(&kvm->arch.noncoherent_dma_count);
12180}
12181EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
12182
12183void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
12184{
12185 atomic_dec(&kvm->arch.noncoherent_dma_count);
12186}
12187EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
12188
12189bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
12190{
12191 return atomic_read(&kvm->arch.noncoherent_dma_count);
12192}
12193EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
12194
12195bool kvm_arch_has_irq_bypass(void)
12196{
12197 return true;
12198}
12199
12200int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
12201 struct irq_bypass_producer *prod)
12202{
12203 struct kvm_kernel_irqfd *irqfd =
12204 container_of(cons, struct kvm_kernel_irqfd, consumer);
12205 int ret;
12206
12207 irqfd->producer = prod;
12208 kvm_arch_start_assignment(irqfd->kvm);
12209 ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm,
12210 prod->irq, irqfd->gsi, 1);
12211
12212 if (ret)
12213 kvm_arch_end_assignment(irqfd->kvm);
12214
12215 return ret;
12216}
12217
12218void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
12219 struct irq_bypass_producer *prod)
12220{
12221 int ret;
12222 struct kvm_kernel_irqfd *irqfd =
12223 container_of(cons, struct kvm_kernel_irqfd, consumer);
12224
12225 WARN_ON(irqfd->producer != prod);
12226 irqfd->producer = NULL;
12227
12228
12229
12230
12231
12232
12233
12234 ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0);
12235 if (ret)
12236 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
12237 " fails: %d\n", irqfd->consumer.token, ret);
12238
12239 kvm_arch_end_assignment(irqfd->kvm);
12240}
12241
12242int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
12243 uint32_t guest_irq, bool set)
12244{
12245 return static_call(kvm_x86_update_pi_irte)(kvm, host_irq, guest_irq, set);
12246}
12247
12248bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
12249 struct kvm_kernel_irq_routing_entry *new)
12250{
12251 if (new->type != KVM_IRQ_ROUTING_MSI)
12252 return true;
12253
12254 return !!memcmp(&old->msi, &new->msi, sizeof(new->msi));
12255}
12256
12257bool kvm_vector_hashing_enabled(void)
12258{
12259 return vector_hashing;
12260}
12261
12262bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
12263{
12264 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
12265}
12266EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
12267
12268
12269int kvm_spec_ctrl_test_value(u64 value)
12270{
12271
12272
12273
12274
12275
12276 u64 saved_value;
12277 unsigned long flags;
12278 int ret = 0;
12279
12280 local_irq_save(flags);
12281
12282 if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
12283 ret = 1;
12284 else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
12285 ret = 1;
12286 else
12287 wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
12288
12289 local_irq_restore(flags);
12290
12291 return ret;
12292}
12293EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
12294
12295void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
12296{
12297 struct x86_exception fault;
12298 u32 access = error_code &
12299 (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
12300
12301 if (!(error_code & PFERR_PRESENT_MASK) ||
12302 vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) {
12303
12304
12305
12306
12307
12308 fault.vector = PF_VECTOR;
12309 fault.error_code_valid = true;
12310 fault.error_code = error_code;
12311 fault.nested_page_fault = false;
12312 fault.address = gva;
12313 }
12314 vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
12315}
12316EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
12317
12318
12319
12320
12321
12322
12323int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
12324 struct x86_exception *e)
12325{
12326 if (r == X86EMUL_PROPAGATE_FAULT) {
12327 kvm_inject_emulated_page_fault(vcpu, e);
12328 return 1;
12329 }
12330
12331
12332
12333
12334
12335
12336
12337
12338 kvm_prepare_emulation_failure_exit(vcpu);
12339
12340 return 0;
12341}
12342EXPORT_SYMBOL_GPL(kvm_handle_memory_failure);
12343
12344int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
12345{
12346 bool pcid_enabled;
12347 struct x86_exception e;
12348 struct {
12349 u64 pcid;
12350 u64 gla;
12351 } operand;
12352 int r;
12353
12354 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
12355 if (r != X86EMUL_CONTINUE)
12356 return kvm_handle_memory_failure(vcpu, r, &e);
12357
12358 if (operand.pcid >> 12 != 0) {
12359 kvm_inject_gp(vcpu, 0);
12360 return 1;
12361 }
12362
12363 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
12364
12365 switch (type) {
12366 case INVPCID_TYPE_INDIV_ADDR:
12367 if ((!pcid_enabled && (operand.pcid != 0)) ||
12368 is_noncanonical_address(operand.gla, vcpu)) {
12369 kvm_inject_gp(vcpu, 0);
12370 return 1;
12371 }
12372 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
12373 return kvm_skip_emulated_instruction(vcpu);
12374
12375 case INVPCID_TYPE_SINGLE_CTXT:
12376 if (!pcid_enabled && (operand.pcid != 0)) {
12377 kvm_inject_gp(vcpu, 0);
12378 return 1;
12379 }
12380
12381 kvm_invalidate_pcid(vcpu, operand.pcid);
12382 return kvm_skip_emulated_instruction(vcpu);
12383
12384 case INVPCID_TYPE_ALL_NON_GLOBAL:
12385
12386
12387
12388
12389
12390
12391
12392 fallthrough;
12393 case INVPCID_TYPE_ALL_INCL_GLOBAL:
12394 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
12395 return kvm_skip_emulated_instruction(vcpu);
12396
12397 default:
12398 kvm_inject_gp(vcpu, 0);
12399 return 1;
12400 }
12401}
12402EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
12403
12404static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
12405{
12406 struct kvm_run *run = vcpu->run;
12407 struct kvm_mmio_fragment *frag;
12408 unsigned int len;
12409
12410 BUG_ON(!vcpu->mmio_needed);
12411
12412
12413 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
12414 len = min(8u, frag->len);
12415 if (!vcpu->mmio_is_write)
12416 memcpy(frag->data, run->mmio.data, len);
12417
12418 if (frag->len <= 8) {
12419
12420 frag++;
12421 vcpu->mmio_cur_fragment++;
12422 } else {
12423
12424 frag->data += len;
12425 frag->gpa += len;
12426 frag->len -= len;
12427 }
12428
12429 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
12430 vcpu->mmio_needed = 0;
12431
12432
12433
12434 return 1;
12435 }
12436
12437
12438 run->mmio.phys_addr = frag->gpa;
12439 run->mmio.len = min(8u, frag->len);
12440 run->mmio.is_write = vcpu->mmio_is_write;
12441 if (run->mmio.is_write)
12442 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
12443 run->exit_reason = KVM_EXIT_MMIO;
12444
12445 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12446
12447 return 0;
12448}
12449
12450int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
12451 void *data)
12452{
12453 int handled;
12454 struct kvm_mmio_fragment *frag;
12455
12456 if (!data)
12457 return -EINVAL;
12458
12459 handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data);
12460 if (handled == bytes)
12461 return 1;
12462
12463 bytes -= handled;
12464 gpa += handled;
12465 data += handled;
12466
12467
12468 frag = vcpu->mmio_fragments;
12469 vcpu->mmio_nr_fragments = 1;
12470 frag->len = bytes;
12471 frag->gpa = gpa;
12472 frag->data = data;
12473
12474 vcpu->mmio_needed = 1;
12475 vcpu->mmio_cur_fragment = 0;
12476
12477 vcpu->run->mmio.phys_addr = gpa;
12478 vcpu->run->mmio.len = min(8u, frag->len);
12479 vcpu->run->mmio.is_write = 1;
12480 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
12481 vcpu->run->exit_reason = KVM_EXIT_MMIO;
12482
12483 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12484
12485 return 0;
12486}
12487EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);
12488
12489int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
12490 void *data)
12491{
12492 int handled;
12493 struct kvm_mmio_fragment *frag;
12494
12495 if (!data)
12496 return -EINVAL;
12497
12498 handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data);
12499 if (handled == bytes)
12500 return 1;
12501
12502 bytes -= handled;
12503 gpa += handled;
12504 data += handled;
12505
12506
12507 frag = vcpu->mmio_fragments;
12508 vcpu->mmio_nr_fragments = 1;
12509 frag->len = bytes;
12510 frag->gpa = gpa;
12511 frag->data = data;
12512
12513 vcpu->mmio_needed = 1;
12514 vcpu->mmio_cur_fragment = 0;
12515
12516 vcpu->run->mmio.phys_addr = gpa;
12517 vcpu->run->mmio.len = min(8u, frag->len);
12518 vcpu->run->mmio.is_write = 0;
12519 vcpu->run->exit_reason = KVM_EXIT_MMIO;
12520
12521 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12522
12523 return 0;
12524}
12525EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
12526
12527static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
12528 unsigned int port);
12529
12530static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
12531{
12532 int size = vcpu->arch.pio.size;
12533 int port = vcpu->arch.pio.port;
12534
12535 vcpu->arch.pio.count = 0;
12536 if (vcpu->arch.sev_pio_count)
12537 return kvm_sev_es_outs(vcpu, size, port);
12538 return 1;
12539}
12540
12541static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
12542 unsigned int port)
12543{
12544 for (;;) {
12545 unsigned int count =
12546 min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
12547 int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
12548
12549
12550 vcpu->arch.sev_pio_count -= count;
12551 vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
12552 if (!ret)
12553 break;
12554
12555
12556 if (!vcpu->arch.sev_pio_count)
12557 return 1;
12558 }
12559
12560 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
12561 return 0;
12562}
12563
12564static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
12565 unsigned int port);
12566
12567static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
12568{
12569 unsigned count = vcpu->arch.pio.count;
12570 complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
12571 vcpu->arch.sev_pio_count -= count;
12572 vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
12573}
12574
12575static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
12576{
12577 int size = vcpu->arch.pio.size;
12578 int port = vcpu->arch.pio.port;
12579
12580 advance_sev_es_emulated_ins(vcpu);
12581 if (vcpu->arch.sev_pio_count)
12582 return kvm_sev_es_ins(vcpu, size, port);
12583 return 1;
12584}
12585
12586static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
12587 unsigned int port)
12588{
12589 for (;;) {
12590 unsigned int count =
12591 min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
12592 if (!__emulator_pio_in(vcpu, size, port, count))
12593 break;
12594
12595
12596 advance_sev_es_emulated_ins(vcpu);
12597 if (!vcpu->arch.sev_pio_count)
12598 return 1;
12599 }
12600
12601 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
12602 return 0;
12603}
12604
12605int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
12606 unsigned int port, void *data, unsigned int count,
12607 int in)
12608{
12609 vcpu->arch.sev_pio_data = data;
12610 vcpu->arch.sev_pio_count = count;
12611 return in ? kvm_sev_es_ins(vcpu, size, port)
12612 : kvm_sev_es_outs(vcpu, size, port);
12613}
12614EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
12615
12616EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
12617EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
12618EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
12619EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
12620EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
12621EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
12622EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
12623EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
12624EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
12625EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
12626EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
12627EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
12628EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
12629EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
12630EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
12631EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
12632EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
12633EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
12634EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
12635EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
12636EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
12637EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
12638EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
12639EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
12640EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
12641EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
12642EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
12643