1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kvm_host.h>
20#include "irq.h"
21#include "ioapic.h"
22#include "mmu.h"
23#include "i8254.h"
24#include "tss.h"
25#include "kvm_cache_regs.h"
26#include "kvm_emulate.h"
27#include "x86.h"
28#include "cpuid.h"
29#include "pmu.h"
30#include "hyperv.h"
31#include "lapic.h"
32#include "xen.h"
33
34#include <linux/clocksource.h>
35#include <linux/interrupt.h>
36#include <linux/kvm.h>
37#include <linux/fs.h>
38#include <linux/vmalloc.h>
39#include <linux/export.h>
40#include <linux/moduleparam.h>
41#include <linux/mman.h>
42#include <linux/highmem.h>
43#include <linux/iommu.h>
44#include <linux/intel-iommu.h>
45#include <linux/cpufreq.h>
46#include <linux/user-return-notifier.h>
47#include <linux/srcu.h>
48#include <linux/slab.h>
49#include <linux/perf_event.h>
50#include <linux/uaccess.h>
51#include <linux/hash.h>
52#include <linux/pci.h>
53#include <linux/timekeeper_internal.h>
54#include <linux/pvclock_gtod.h>
55#include <linux/kvm_irqfd.h>
56#include <linux/irqbypass.h>
57#include <linux/sched/stat.h>
58#include <linux/sched/isolation.h>
59#include <linux/mem_encrypt.h>
60#include <linux/entry-kvm.h>
61#include <linux/suspend.h>
62
63#include <trace/events/kvm.h>
64
65#include <asm/debugreg.h>
66#include <asm/msr.h>
67#include <asm/desc.h>
68#include <asm/mce.h>
69#include <asm/pkru.h>
70#include <linux/kernel_stat.h>
71#include <asm/fpu/internal.h>
72#include <asm/pvclock.h>
73#include <asm/div64.h>
74#include <asm/irq_remapping.h>
75#include <asm/mshyperv.h>
76#include <asm/hypervisor.h>
77#include <asm/tlbflush.h>
78#include <asm/intel_pt.h>
79#include <asm/emulate_prefix.h>
80#include <asm/sgx.h>
81#include <clocksource/hyperv_timer.h>
82
83#define CREATE_TRACE_POINTS
84#include "trace.h"
85
86#define MAX_IO_MSRS 256
87#define KVM_MAX_MCE_BANKS 32
88u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
89EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
90
91#define emul_to_vcpu(ctxt) \
92 ((struct kvm_vcpu *)(ctxt)->vcpu)
93
94
95
96
97
98#ifdef CONFIG_X86_64
99static
100u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
101#else
102static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
103#endif
104
105static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
106
107#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
108
109#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
110 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
111
112static void update_cr8_intercept(struct kvm_vcpu *vcpu);
113static void process_nmi(struct kvm_vcpu *vcpu);
114static void process_smi(struct kvm_vcpu *vcpu);
115static void enter_smm(struct kvm_vcpu *vcpu);
116static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
117static void store_regs(struct kvm_vcpu *vcpu);
118static int sync_regs(struct kvm_vcpu *vcpu);
119
120static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
121static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
122
123struct kvm_x86_ops kvm_x86_ops __read_mostly;
124EXPORT_SYMBOL_GPL(kvm_x86_ops);
125
126#define KVM_X86_OP(func) \
127 DEFINE_STATIC_CALL_NULL(kvm_x86_##func, \
128 *(((struct kvm_x86_ops *)0)->func));
129#define KVM_X86_OP_NULL KVM_X86_OP
130#include <asm/kvm-x86-ops.h>
131EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
132EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
133EXPORT_STATIC_CALL_GPL(kvm_x86_tlb_flush_current);
134
135static bool __read_mostly ignore_msrs = 0;
136module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
137
138bool __read_mostly report_ignored_msrs = true;
139module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
140EXPORT_SYMBOL_GPL(report_ignored_msrs);
141
142unsigned int min_timer_period_us = 200;
143module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
144
145static bool __read_mostly kvmclock_periodic_sync = true;
146module_param(kvmclock_periodic_sync, bool, S_IRUGO);
147
148bool __read_mostly kvm_has_tsc_control;
149EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
150u32 __read_mostly kvm_max_guest_tsc_khz;
151EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
152u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
153EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
154u64 __read_mostly kvm_max_tsc_scaling_ratio;
155EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
156u64 __read_mostly kvm_default_tsc_scaling_ratio;
157EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
158bool __read_mostly kvm_has_bus_lock_exit;
159EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit);
160
161
162static u32 __read_mostly tsc_tolerance_ppm = 250;
163module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
164
165
166
167
168
169
170
171static int __read_mostly lapic_timer_advance_ns = -1;
172module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
173
174static bool __read_mostly vector_hashing = true;
175module_param(vector_hashing, bool, S_IRUGO);
176
177bool __read_mostly enable_vmware_backdoor = false;
178module_param(enable_vmware_backdoor, bool, S_IRUGO);
179EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
180
181static bool __read_mostly force_emulation_prefix = false;
182module_param(force_emulation_prefix, bool, S_IRUGO);
183
184int __read_mostly pi_inject_timer = -1;
185module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
186
187
188
189
190
191
192#define KVM_MAX_NR_USER_RETURN_MSRS 16
193
194struct kvm_user_return_msrs {
195 struct user_return_notifier urn;
196 bool registered;
197 struct kvm_user_return_msr_values {
198 u64 host;
199 u64 curr;
200 } values[KVM_MAX_NR_USER_RETURN_MSRS];
201};
202
203u32 __read_mostly kvm_nr_uret_msrs;
204EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
205static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
206static struct kvm_user_return_msrs __percpu *user_return_msrs;
207
208#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
209 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
210 | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
211 | XFEATURE_MASK_PKRU)
212
213u64 __read_mostly host_efer;
214EXPORT_SYMBOL_GPL(host_efer);
215
216bool __read_mostly allow_smaller_maxphyaddr = 0;
217EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
218
219bool __read_mostly enable_apicv = true;
220EXPORT_SYMBOL_GPL(enable_apicv);
221
222u64 __read_mostly host_xss;
223EXPORT_SYMBOL_GPL(host_xss);
224u64 __read_mostly supported_xss;
225EXPORT_SYMBOL_GPL(supported_xss);
226
227const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
228 KVM_GENERIC_VM_STATS(),
229 STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
230 STATS_DESC_COUNTER(VM, mmu_pte_write),
231 STATS_DESC_COUNTER(VM, mmu_pde_zapped),
232 STATS_DESC_COUNTER(VM, mmu_flooded),
233 STATS_DESC_COUNTER(VM, mmu_recycled),
234 STATS_DESC_COUNTER(VM, mmu_cache_miss),
235 STATS_DESC_ICOUNTER(VM, mmu_unsync),
236 STATS_DESC_ICOUNTER(VM, pages_4k),
237 STATS_DESC_ICOUNTER(VM, pages_2m),
238 STATS_DESC_ICOUNTER(VM, pages_1g),
239 STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
240 STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
241 STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
242};
243
244const struct kvm_stats_header kvm_vm_stats_header = {
245 .name_size = KVM_STATS_NAME_SIZE,
246 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
247 .id_offset = sizeof(struct kvm_stats_header),
248 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
249 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
250 sizeof(kvm_vm_stats_desc),
251};
252
253const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
254 KVM_GENERIC_VCPU_STATS(),
255 STATS_DESC_COUNTER(VCPU, pf_fixed),
256 STATS_DESC_COUNTER(VCPU, pf_guest),
257 STATS_DESC_COUNTER(VCPU, tlb_flush),
258 STATS_DESC_COUNTER(VCPU, invlpg),
259 STATS_DESC_COUNTER(VCPU, exits),
260 STATS_DESC_COUNTER(VCPU, io_exits),
261 STATS_DESC_COUNTER(VCPU, mmio_exits),
262 STATS_DESC_COUNTER(VCPU, signal_exits),
263 STATS_DESC_COUNTER(VCPU, irq_window_exits),
264 STATS_DESC_COUNTER(VCPU, nmi_window_exits),
265 STATS_DESC_COUNTER(VCPU, l1d_flush),
266 STATS_DESC_COUNTER(VCPU, halt_exits),
267 STATS_DESC_COUNTER(VCPU, request_irq_exits),
268 STATS_DESC_COUNTER(VCPU, irq_exits),
269 STATS_DESC_COUNTER(VCPU, host_state_reload),
270 STATS_DESC_COUNTER(VCPU, fpu_reload),
271 STATS_DESC_COUNTER(VCPU, insn_emulation),
272 STATS_DESC_COUNTER(VCPU, insn_emulation_fail),
273 STATS_DESC_COUNTER(VCPU, hypercalls),
274 STATS_DESC_COUNTER(VCPU, irq_injections),
275 STATS_DESC_COUNTER(VCPU, nmi_injections),
276 STATS_DESC_COUNTER(VCPU, req_event),
277 STATS_DESC_COUNTER(VCPU, nested_run),
278 STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
279 STATS_DESC_COUNTER(VCPU, directed_yield_successful),
280 STATS_DESC_ICOUNTER(VCPU, guest_mode)
281};
282
283const struct kvm_stats_header kvm_vcpu_stats_header = {
284 .name_size = KVM_STATS_NAME_SIZE,
285 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
286 .id_offset = sizeof(struct kvm_stats_header),
287 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
288 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
289 sizeof(kvm_vcpu_stats_desc),
290};
291
292u64 __read_mostly host_xcr0;
293u64 __read_mostly supported_xcr0;
294EXPORT_SYMBOL_GPL(supported_xcr0);
295
296static struct kmem_cache *x86_fpu_cache;
297
298static struct kmem_cache *x86_emulator_cache;
299
300
301
302
303
304static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
305{
306 const char *op = write ? "wrmsr" : "rdmsr";
307
308 if (ignore_msrs) {
309 if (report_ignored_msrs)
310 kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
311 op, msr, data);
312
313 return true;
314 } else {
315 kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
316 op, msr, data);
317 return false;
318 }
319}
320
321static struct kmem_cache *kvm_alloc_emulator_cache(void)
322{
323 unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
324 unsigned int size = sizeof(struct x86_emulate_ctxt);
325
326 return kmem_cache_create_usercopy("x86_emulator", size,
327 __alignof__(struct x86_emulate_ctxt),
328 SLAB_ACCOUNT, useroffset,
329 size - useroffset, NULL);
330}
331
332static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
333
334static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
335{
336 int i;
337 for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
338 vcpu->arch.apf.gfns[i] = ~0;
339}
340
341static void kvm_on_user_return(struct user_return_notifier *urn)
342{
343 unsigned slot;
344 struct kvm_user_return_msrs *msrs
345 = container_of(urn, struct kvm_user_return_msrs, urn);
346 struct kvm_user_return_msr_values *values;
347 unsigned long flags;
348
349
350
351
352
353 local_irq_save(flags);
354 if (msrs->registered) {
355 msrs->registered = false;
356 user_return_notifier_unregister(urn);
357 }
358 local_irq_restore(flags);
359 for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
360 values = &msrs->values[slot];
361 if (values->host != values->curr) {
362 wrmsrl(kvm_uret_msrs_list[slot], values->host);
363 values->curr = values->host;
364 }
365 }
366}
367
368static int kvm_probe_user_return_msr(u32 msr)
369{
370 u64 val;
371 int ret;
372
373 preempt_disable();
374 ret = rdmsrl_safe(msr, &val);
375 if (ret)
376 goto out;
377 ret = wrmsrl_safe(msr, val);
378out:
379 preempt_enable();
380 return ret;
381}
382
383int kvm_add_user_return_msr(u32 msr)
384{
385 BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
386
387 if (kvm_probe_user_return_msr(msr))
388 return -1;
389
390 kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
391 return kvm_nr_uret_msrs++;
392}
393EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
394
395int kvm_find_user_return_msr(u32 msr)
396{
397 int i;
398
399 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
400 if (kvm_uret_msrs_list[i] == msr)
401 return i;
402 }
403 return -1;
404}
405EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
406
407static void kvm_user_return_msr_cpu_online(void)
408{
409 unsigned int cpu = smp_processor_id();
410 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
411 u64 value;
412 int i;
413
414 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
415 rdmsrl_safe(kvm_uret_msrs_list[i], &value);
416 msrs->values[i].host = value;
417 msrs->values[i].curr = value;
418 }
419}
420
421int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
422{
423 unsigned int cpu = smp_processor_id();
424 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
425 int err;
426
427 value = (value & mask) | (msrs->values[slot].host & ~mask);
428 if (value == msrs->values[slot].curr)
429 return 0;
430 err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
431 if (err)
432 return 1;
433
434 msrs->values[slot].curr = value;
435 if (!msrs->registered) {
436 msrs->urn.on_user_return = kvm_on_user_return;
437 user_return_notifier_register(&msrs->urn);
438 msrs->registered = true;
439 }
440 return 0;
441}
442EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
443
444static void drop_user_return_notifiers(void)
445{
446 unsigned int cpu = smp_processor_id();
447 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
448
449 if (msrs->registered)
450 kvm_on_user_return(&msrs->urn);
451}
452
453u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
454{
455 return vcpu->arch.apic_base;
456}
457EXPORT_SYMBOL_GPL(kvm_get_apic_base);
458
459enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
460{
461 return kvm_apic_mode(kvm_get_apic_base(vcpu));
462}
463EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
464
465int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
466{
467 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
468 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
469 u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
470 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
471
472 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
473 return 1;
474 if (!msr_info->host_initiated) {
475 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
476 return 1;
477 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
478 return 1;
479 }
480
481 kvm_lapic_set_base(vcpu, msr_info->data);
482 kvm_recalculate_apic_map(vcpu->kvm);
483 return 0;
484}
485EXPORT_SYMBOL_GPL(kvm_set_apic_base);
486
487
488
489
490
491
492
493
494noinstr void kvm_spurious_fault(void)
495{
496
497 BUG_ON(!kvm_rebooting);
498}
499EXPORT_SYMBOL_GPL(kvm_spurious_fault);
500
501#define EXCPT_BENIGN 0
502#define EXCPT_CONTRIBUTORY 1
503#define EXCPT_PF 2
504
505static int exception_class(int vector)
506{
507 switch (vector) {
508 case PF_VECTOR:
509 return EXCPT_PF;
510 case DE_VECTOR:
511 case TS_VECTOR:
512 case NP_VECTOR:
513 case SS_VECTOR:
514 case GP_VECTOR:
515 return EXCPT_CONTRIBUTORY;
516 default:
517 break;
518 }
519 return EXCPT_BENIGN;
520}
521
522#define EXCPT_FAULT 0
523#define EXCPT_TRAP 1
524#define EXCPT_ABORT 2
525#define EXCPT_INTERRUPT 3
526
527static int exception_type(int vector)
528{
529 unsigned int mask;
530
531 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
532 return EXCPT_INTERRUPT;
533
534 mask = 1 << vector;
535
536
537 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
538 return EXCPT_TRAP;
539
540 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
541 return EXCPT_ABORT;
542
543
544 return EXCPT_FAULT;
545}
546
547void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
548{
549 unsigned nr = vcpu->arch.exception.nr;
550 bool has_payload = vcpu->arch.exception.has_payload;
551 unsigned long payload = vcpu->arch.exception.payload;
552
553 if (!has_payload)
554 return;
555
556 switch (nr) {
557 case DB_VECTOR:
558
559
560
561
562
563 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580 vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
581 vcpu->arch.dr6 |= payload;
582 vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW;
583
584
585
586
587
588
589
590 vcpu->arch.dr6 &= ~BIT(12);
591 break;
592 case PF_VECTOR:
593 vcpu->arch.cr2 = payload;
594 break;
595 }
596
597 vcpu->arch.exception.has_payload = false;
598 vcpu->arch.exception.payload = 0;
599}
600EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
601
602static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
603 unsigned nr, bool has_error, u32 error_code,
604 bool has_payload, unsigned long payload, bool reinject)
605{
606 u32 prev_nr;
607 int class1, class2;
608
609 kvm_make_request(KVM_REQ_EVENT, vcpu);
610
611 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
612 queue:
613 if (reinject) {
614
615
616
617
618
619
620
621
622 WARN_ON_ONCE(vcpu->arch.exception.pending);
623 vcpu->arch.exception.injected = true;
624 if (WARN_ON_ONCE(has_payload)) {
625
626
627
628
629 has_payload = false;
630 payload = 0;
631 }
632 } else {
633 vcpu->arch.exception.pending = true;
634 vcpu->arch.exception.injected = false;
635 }
636 vcpu->arch.exception.has_error_code = has_error;
637 vcpu->arch.exception.nr = nr;
638 vcpu->arch.exception.error_code = error_code;
639 vcpu->arch.exception.has_payload = has_payload;
640 vcpu->arch.exception.payload = payload;
641 if (!is_guest_mode(vcpu))
642 kvm_deliver_exception_payload(vcpu);
643 return;
644 }
645
646
647 prev_nr = vcpu->arch.exception.nr;
648 if (prev_nr == DF_VECTOR) {
649
650 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
651 return;
652 }
653 class1 = exception_class(prev_nr);
654 class2 = exception_class(nr);
655 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
656 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
657
658
659
660
661
662 vcpu->arch.exception.pending = true;
663 vcpu->arch.exception.injected = false;
664 vcpu->arch.exception.has_error_code = true;
665 vcpu->arch.exception.nr = DF_VECTOR;
666 vcpu->arch.exception.error_code = 0;
667 vcpu->arch.exception.has_payload = false;
668 vcpu->arch.exception.payload = 0;
669 } else
670
671
672
673 goto queue;
674}
675
676void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
677{
678 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
679}
680EXPORT_SYMBOL_GPL(kvm_queue_exception);
681
682void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
683{
684 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
685}
686EXPORT_SYMBOL_GPL(kvm_requeue_exception);
687
688void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
689 unsigned long payload)
690{
691 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
692}
693EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
694
695static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
696 u32 error_code, unsigned long payload)
697{
698 kvm_multiple_exception(vcpu, nr, true, error_code,
699 true, payload, false);
700}
701
702int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
703{
704 if (err)
705 kvm_inject_gp(vcpu, 0);
706 else
707 return kvm_skip_emulated_instruction(vcpu);
708
709 return 1;
710}
711EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
712
713void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
714{
715 ++vcpu->stat.pf_guest;
716 vcpu->arch.exception.nested_apf =
717 is_guest_mode(vcpu) && fault->async_page_fault;
718 if (vcpu->arch.exception.nested_apf) {
719 vcpu->arch.apf.nested_apf_token = fault->address;
720 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
721 } else {
722 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
723 fault->address);
724 }
725}
726EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
727
728bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
729 struct x86_exception *fault)
730{
731 struct kvm_mmu *fault_mmu;
732 WARN_ON_ONCE(fault->vector != PF_VECTOR);
733
734 fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
735 vcpu->arch.walk_mmu;
736
737
738
739
740
741 if ((fault->error_code & PFERR_PRESENT_MASK) &&
742 !(fault->error_code & PFERR_RSVD_MASK))
743 kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
744 fault_mmu->root_hpa);
745
746 fault_mmu->inject_page_fault(vcpu, fault);
747 return fault->nested_page_fault;
748}
749EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
750
751void kvm_inject_nmi(struct kvm_vcpu *vcpu)
752{
753 atomic_inc(&vcpu->arch.nmi_queued);
754 kvm_make_request(KVM_REQ_NMI, vcpu);
755}
756EXPORT_SYMBOL_GPL(kvm_inject_nmi);
757
758void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
759{
760 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
761}
762EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
763
764void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
765{
766 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
767}
768EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
769
770
771
772
773
774bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
775{
776 if (static_call(kvm_x86_get_cpl)(vcpu) <= required_cpl)
777 return true;
778 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
779 return false;
780}
781EXPORT_SYMBOL_GPL(kvm_require_cpl);
782
783bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
784{
785 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
786 return true;
787
788 kvm_queue_exception(vcpu, UD_VECTOR);
789 return false;
790}
791EXPORT_SYMBOL_GPL(kvm_require_dr);
792
793
794
795
796
797
798int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
799 gfn_t ngfn, void *data, int offset, int len,
800 u32 access)
801{
802 struct x86_exception exception;
803 gfn_t real_gfn;
804 gpa_t ngpa;
805
806 ngpa = gfn_to_gpa(ngfn);
807 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
808 if (real_gfn == UNMAPPED_GVA)
809 return -EFAULT;
810
811 real_gfn = gpa_to_gfn(real_gfn);
812
813 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
814}
815EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
816
817static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
818{
819 return vcpu->arch.reserved_gpa_bits | rsvd_bits(5, 8) | rsvd_bits(1, 2);
820}
821
822
823
824
825int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
826{
827 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
828 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
829 int i;
830 int ret;
831 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
832
833 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
834 offset * sizeof(u64), sizeof(pdpte),
835 PFERR_USER_MASK|PFERR_WRITE_MASK);
836 if (ret < 0) {
837 ret = 0;
838 goto out;
839 }
840 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
841 if ((pdpte[i] & PT_PRESENT_MASK) &&
842 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
843 ret = 0;
844 goto out;
845 }
846 }
847 ret = 1;
848
849 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
850 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
851 vcpu->arch.pdptrs_from_userspace = false;
852
853out:
854
855 return ret;
856}
857EXPORT_SYMBOL_GPL(load_pdptrs);
858
859void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
860{
861 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
862 kvm_clear_async_pf_completion_queue(vcpu);
863 kvm_async_pf_hash_reset(vcpu);
864 }
865
866 if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
867 kvm_mmu_reset_context(vcpu);
868
869 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
870 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
871 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
872 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
873}
874EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
875
876int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
877{
878 unsigned long old_cr0 = kvm_read_cr0(vcpu);
879 unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
880
881 cr0 |= X86_CR0_ET;
882
883#ifdef CONFIG_X86_64
884 if (cr0 & 0xffffffff00000000UL)
885 return 1;
886#endif
887
888 cr0 &= ~CR0_RESERVED_BITS;
889
890 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
891 return 1;
892
893 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
894 return 1;
895
896#ifdef CONFIG_X86_64
897 if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
898 (cr0 & X86_CR0_PG)) {
899 int cs_db, cs_l;
900
901 if (!is_pae(vcpu))
902 return 1;
903 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
904 if (cs_l)
905 return 1;
906 }
907#endif
908 if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
909 is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
910 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
911 return 1;
912
913 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
914 return 1;
915
916 static_call(kvm_x86_set_cr0)(vcpu, cr0);
917
918 kvm_post_set_cr0(vcpu, old_cr0, cr0);
919
920 return 0;
921}
922EXPORT_SYMBOL_GPL(kvm_set_cr0);
923
924void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
925{
926 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
927}
928EXPORT_SYMBOL_GPL(kvm_lmsw);
929
930void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
931{
932 if (vcpu->arch.guest_state_protected)
933 return;
934
935 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
936
937 if (vcpu->arch.xcr0 != host_xcr0)
938 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
939
940 if (vcpu->arch.xsaves_enabled &&
941 vcpu->arch.ia32_xss != host_xss)
942 wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
943 }
944
945 if (static_cpu_has(X86_FEATURE_PKU) &&
946 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
947 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
948 vcpu->arch.pkru != vcpu->arch.host_pkru)
949 write_pkru(vcpu->arch.pkru);
950}
951EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
952
953void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
954{
955 if (vcpu->arch.guest_state_protected)
956 return;
957
958 if (static_cpu_has(X86_FEATURE_PKU) &&
959 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
960 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
961 vcpu->arch.pkru = rdpkru();
962 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
963 write_pkru(vcpu->arch.host_pkru);
964 }
965
966 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
967
968 if (vcpu->arch.xcr0 != host_xcr0)
969 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
970
971 if (vcpu->arch.xsaves_enabled &&
972 vcpu->arch.ia32_xss != host_xss)
973 wrmsrl(MSR_IA32_XSS, host_xss);
974 }
975
976}
977EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
978
979static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
980{
981 u64 xcr0 = xcr;
982 u64 old_xcr0 = vcpu->arch.xcr0;
983 u64 valid_bits;
984
985
986 if (index != XCR_XFEATURE_ENABLED_MASK)
987 return 1;
988 if (!(xcr0 & XFEATURE_MASK_FP))
989 return 1;
990 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
991 return 1;
992
993
994
995
996
997
998 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
999 if (xcr0 & ~valid_bits)
1000 return 1;
1001
1002 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
1003 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
1004 return 1;
1005
1006 if (xcr0 & XFEATURE_MASK_AVX512) {
1007 if (!(xcr0 & XFEATURE_MASK_YMM))
1008 return 1;
1009 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
1010 return 1;
1011 }
1012 vcpu->arch.xcr0 = xcr0;
1013
1014 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
1015 kvm_update_cpuid_runtime(vcpu);
1016 return 0;
1017}
1018
1019int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
1020{
1021 if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
1022 __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
1023 kvm_inject_gp(vcpu, 0);
1024 return 1;
1025 }
1026
1027 return kvm_skip_emulated_instruction(vcpu);
1028}
1029EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
1030
1031bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1032{
1033 if (cr4 & cr4_reserved_bits)
1034 return false;
1035
1036 if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
1037 return false;
1038
1039 return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
1040}
1041EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
1042
1043void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
1044{
1045 if (((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS) ||
1046 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
1047 kvm_mmu_reset_context(vcpu);
1048}
1049EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
1050
1051int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1052{
1053 unsigned long old_cr4 = kvm_read_cr4(vcpu);
1054 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
1055 X86_CR4_SMEP;
1056
1057 if (!kvm_is_valid_cr4(vcpu, cr4))
1058 return 1;
1059
1060 if (is_long_mode(vcpu)) {
1061 if (!(cr4 & X86_CR4_PAE))
1062 return 1;
1063 if ((cr4 ^ old_cr4) & X86_CR4_LA57)
1064 return 1;
1065 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
1066 && ((cr4 ^ old_cr4) & pdptr_bits)
1067 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
1068 kvm_read_cr3(vcpu)))
1069 return 1;
1070
1071 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
1072 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
1073 return 1;
1074
1075
1076 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
1077 return 1;
1078 }
1079
1080 static_call(kvm_x86_set_cr4)(vcpu, cr4);
1081
1082 kvm_post_set_cr4(vcpu, old_cr4, cr4);
1083
1084 return 0;
1085}
1086EXPORT_SYMBOL_GPL(kvm_set_cr4);
1087
1088static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
1089{
1090 struct kvm_mmu *mmu = vcpu->arch.mmu;
1091 unsigned long roots_to_free = 0;
1092 int i;
1093
1094
1095
1096
1097
1098
1099 if (kvm_get_active_pcid(vcpu) == pcid) {
1100 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
1101 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1102 }
1103
1104 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
1105 if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
1106 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
1107
1108 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
1109}
1110
1111int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1112{
1113 bool skip_tlb_flush = false;
1114 unsigned long pcid = 0;
1115#ifdef CONFIG_X86_64
1116 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
1117
1118 if (pcid_enabled) {
1119 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
1120 cr3 &= ~X86_CR3_PCID_NOFLUSH;
1121 pcid = cr3 & X86_CR3_PCID_MASK;
1122 }
1123#endif
1124
1125
1126 if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu))
1127 goto handle_tlb_flush;
1128
1129
1130
1131
1132
1133
1134 if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
1135 return 1;
1136
1137 if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
1138 return 1;
1139
1140 if (cr3 != kvm_read_cr3(vcpu))
1141 kvm_mmu_new_pgd(vcpu, cr3);
1142
1143 vcpu->arch.cr3 = cr3;
1144 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
1145
1146handle_tlb_flush:
1147
1148
1149
1150
1151
1152
1153
1154 if (!skip_tlb_flush)
1155 kvm_invalidate_pcid(vcpu, pcid);
1156
1157 return 0;
1158}
1159EXPORT_SYMBOL_GPL(kvm_set_cr3);
1160
1161int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
1162{
1163 if (cr8 & CR8_RESERVED_BITS)
1164 return 1;
1165 if (lapic_in_kernel(vcpu))
1166 kvm_lapic_set_tpr(vcpu, cr8);
1167 else
1168 vcpu->arch.cr8 = cr8;
1169 return 0;
1170}
1171EXPORT_SYMBOL_GPL(kvm_set_cr8);
1172
1173unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1174{
1175 if (lapic_in_kernel(vcpu))
1176 return kvm_lapic_get_cr8(vcpu);
1177 else
1178 return vcpu->arch.cr8;
1179}
1180EXPORT_SYMBOL_GPL(kvm_get_cr8);
1181
1182static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1183{
1184 int i;
1185
1186 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1187 for (i = 0; i < KVM_NR_DB_REGS; i++)
1188 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1189 }
1190}
1191
1192void kvm_update_dr7(struct kvm_vcpu *vcpu)
1193{
1194 unsigned long dr7;
1195
1196 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1197 dr7 = vcpu->arch.guest_debug_dr7;
1198 else
1199 dr7 = vcpu->arch.dr7;
1200 static_call(kvm_x86_set_dr7)(vcpu, dr7);
1201 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1202 if (dr7 & DR7_BP_EN_MASK)
1203 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1204}
1205EXPORT_SYMBOL_GPL(kvm_update_dr7);
1206
1207static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1208{
1209 u64 fixed = DR6_FIXED_1;
1210
1211 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1212 fixed |= DR6_RTM;
1213
1214 if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
1215 fixed |= DR6_BUS_LOCK;
1216 return fixed;
1217}
1218
1219int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1220{
1221 size_t size = ARRAY_SIZE(vcpu->arch.db);
1222
1223 switch (dr) {
1224 case 0 ... 3:
1225 vcpu->arch.db[array_index_nospec(dr, size)] = val;
1226 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1227 vcpu->arch.eff_db[dr] = val;
1228 break;
1229 case 4:
1230 case 6:
1231 if (!kvm_dr6_valid(val))
1232 return 1;
1233 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1234 break;
1235 case 5:
1236 default:
1237 if (!kvm_dr7_valid(val))
1238 return 1;
1239 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1240 kvm_update_dr7(vcpu);
1241 break;
1242 }
1243
1244 return 0;
1245}
1246EXPORT_SYMBOL_GPL(kvm_set_dr);
1247
1248void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1249{
1250 size_t size = ARRAY_SIZE(vcpu->arch.db);
1251
1252 switch (dr) {
1253 case 0 ... 3:
1254 *val = vcpu->arch.db[array_index_nospec(dr, size)];
1255 break;
1256 case 4:
1257 case 6:
1258 *val = vcpu->arch.dr6;
1259 break;
1260 case 5:
1261 default:
1262 *val = vcpu->arch.dr7;
1263 break;
1264 }
1265}
1266EXPORT_SYMBOL_GPL(kvm_get_dr);
1267
1268int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
1269{
1270 u32 ecx = kvm_rcx_read(vcpu);
1271 u64 data;
1272
1273 if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
1274 kvm_inject_gp(vcpu, 0);
1275 return 1;
1276 }
1277
1278 kvm_rax_write(vcpu, (u32)data);
1279 kvm_rdx_write(vcpu, data >> 32);
1280 return kvm_skip_emulated_instruction(vcpu);
1281}
1282EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296static const u32 msrs_to_save_all[] = {
1297 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1298 MSR_STAR,
1299#ifdef CONFIG_X86_64
1300 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1301#endif
1302 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1303 MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1304 MSR_IA32_SPEC_CTRL,
1305 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1306 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1307 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1308 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1309 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1310 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1311 MSR_IA32_UMWAIT_CONTROL,
1312
1313 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1314 MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
1315 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1316 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1317 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1318 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1319 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1320 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1321 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1322 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1323 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1324 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1325 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1326 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1327 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1328 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1329 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1330 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1331 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1332 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1333 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1334 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1335
1336 MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
1337 MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
1338 MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
1339 MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
1340 MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
1341 MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
1342};
1343
1344static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1345static unsigned num_msrs_to_save;
1346
1347static const u32 emulated_msrs_all[] = {
1348 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1349 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1350 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1351 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1352 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1353 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1354 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1355 HV_X64_MSR_RESET,
1356 HV_X64_MSR_VP_INDEX,
1357 HV_X64_MSR_VP_RUNTIME,
1358 HV_X64_MSR_SCONTROL,
1359 HV_X64_MSR_STIMER0_CONFIG,
1360 HV_X64_MSR_VP_ASSIST_PAGE,
1361 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1362 HV_X64_MSR_TSC_EMULATION_STATUS,
1363 HV_X64_MSR_SYNDBG_OPTIONS,
1364 HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
1365 HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
1366 HV_X64_MSR_SYNDBG_PENDING_BUFFER,
1367
1368 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1369 MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
1370
1371 MSR_IA32_TSC_ADJUST,
1372 MSR_IA32_TSC_DEADLINE,
1373 MSR_IA32_ARCH_CAPABILITIES,
1374 MSR_IA32_PERF_CAPABILITIES,
1375 MSR_IA32_MISC_ENABLE,
1376 MSR_IA32_MCG_STATUS,
1377 MSR_IA32_MCG_CTL,
1378 MSR_IA32_MCG_EXT_CTL,
1379 MSR_IA32_SMBASE,
1380 MSR_SMI_COUNT,
1381 MSR_PLATFORM_INFO,
1382 MSR_MISC_FEATURES_ENABLES,
1383 MSR_AMD64_VIRT_SPEC_CTRL,
1384 MSR_IA32_POWER_CTL,
1385 MSR_IA32_UCODE_REV,
1386
1387
1388
1389
1390
1391
1392
1393
1394 MSR_IA32_VMX_BASIC,
1395 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1396 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1397 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1398 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1399 MSR_IA32_VMX_MISC,
1400 MSR_IA32_VMX_CR0_FIXED0,
1401 MSR_IA32_VMX_CR4_FIXED0,
1402 MSR_IA32_VMX_VMCS_ENUM,
1403 MSR_IA32_VMX_PROCBASED_CTLS2,
1404 MSR_IA32_VMX_EPT_VPID_CAP,
1405 MSR_IA32_VMX_VMFUNC,
1406
1407 MSR_K7_HWCR,
1408 MSR_KVM_POLL_CONTROL,
1409};
1410
1411static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1412static unsigned num_emulated_msrs;
1413
1414
1415
1416
1417
1418static const u32 msr_based_features_all[] = {
1419 MSR_IA32_VMX_BASIC,
1420 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1421 MSR_IA32_VMX_PINBASED_CTLS,
1422 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1423 MSR_IA32_VMX_PROCBASED_CTLS,
1424 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1425 MSR_IA32_VMX_EXIT_CTLS,
1426 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1427 MSR_IA32_VMX_ENTRY_CTLS,
1428 MSR_IA32_VMX_MISC,
1429 MSR_IA32_VMX_CR0_FIXED0,
1430 MSR_IA32_VMX_CR0_FIXED1,
1431 MSR_IA32_VMX_CR4_FIXED0,
1432 MSR_IA32_VMX_CR4_FIXED1,
1433 MSR_IA32_VMX_VMCS_ENUM,
1434 MSR_IA32_VMX_PROCBASED_CTLS2,
1435 MSR_IA32_VMX_EPT_VPID_CAP,
1436 MSR_IA32_VMX_VMFUNC,
1437
1438 MSR_F10H_DECFG,
1439 MSR_IA32_UCODE_REV,
1440 MSR_IA32_ARCH_CAPABILITIES,
1441 MSR_IA32_PERF_CAPABILITIES,
1442};
1443
1444static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1445static unsigned int num_msr_based_features;
1446
1447static u64 kvm_get_arch_capabilities(void)
1448{
1449 u64 data = 0;
1450
1451 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1452 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1453
1454
1455
1456
1457
1458
1459
1460 data |= ARCH_CAP_PSCHANGE_MC_NO;
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1472 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1473
1474 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1475 data |= ARCH_CAP_RDCL_NO;
1476 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1477 data |= ARCH_CAP_SSB_NO;
1478 if (!boot_cpu_has_bug(X86_BUG_MDS))
1479 data |= ARCH_CAP_MDS_NO;
1480
1481 if (!boot_cpu_has(X86_FEATURE_RTM)) {
1482
1483
1484
1485
1486
1487
1488
1489 data &= ~ARCH_CAP_TAA_NO;
1490 } else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
1491 data |= ARCH_CAP_TAA_NO;
1492 } else {
1493
1494
1495
1496
1497
1498 }
1499
1500 return data;
1501}
1502
1503static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1504{
1505 switch (msr->index) {
1506 case MSR_IA32_ARCH_CAPABILITIES:
1507 msr->data = kvm_get_arch_capabilities();
1508 break;
1509 case MSR_IA32_UCODE_REV:
1510 rdmsrl_safe(msr->index, &msr->data);
1511 break;
1512 default:
1513 return static_call(kvm_x86_get_msr_feature)(msr);
1514 }
1515 return 0;
1516}
1517
1518static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1519{
1520 struct kvm_msr_entry msr;
1521 int r;
1522
1523 msr.index = index;
1524 r = kvm_get_msr_feature(&msr);
1525
1526 if (r == KVM_MSR_RET_INVALID) {
1527
1528 *data = 0;
1529 if (kvm_msr_ignored_check(index, 0, false))
1530 r = 0;
1531 }
1532
1533 if (r)
1534 return r;
1535
1536 *data = msr.data;
1537
1538 return 0;
1539}
1540
1541static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1542{
1543 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1544 return false;
1545
1546 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1547 return false;
1548
1549 if (efer & (EFER_LME | EFER_LMA) &&
1550 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1551 return false;
1552
1553 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1554 return false;
1555
1556 return true;
1557
1558}
1559bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1560{
1561 if (efer & efer_reserved_bits)
1562 return false;
1563
1564 return __kvm_valid_efer(vcpu, efer);
1565}
1566EXPORT_SYMBOL_GPL(kvm_valid_efer);
1567
1568static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1569{
1570 u64 old_efer = vcpu->arch.efer;
1571 u64 efer = msr_info->data;
1572 int r;
1573
1574 if (efer & efer_reserved_bits)
1575 return 1;
1576
1577 if (!msr_info->host_initiated) {
1578 if (!__kvm_valid_efer(vcpu, efer))
1579 return 1;
1580
1581 if (is_paging(vcpu) &&
1582 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1583 return 1;
1584 }
1585
1586 efer &= ~EFER_LMA;
1587 efer |= vcpu->arch.efer & EFER_LMA;
1588
1589 r = static_call(kvm_x86_set_efer)(vcpu, efer);
1590 if (r) {
1591 WARN_ON(r > 0);
1592 return r;
1593 }
1594
1595
1596 if ((efer ^ old_efer) & EFER_NX)
1597 kvm_mmu_reset_context(vcpu);
1598
1599 return 0;
1600}
1601
1602void kvm_enable_efer_bits(u64 mask)
1603{
1604 efer_reserved_bits &= ~mask;
1605}
1606EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1607
1608bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
1609{
1610 struct kvm_x86_msr_filter *msr_filter;
1611 struct msr_bitmap_range *ranges;
1612 struct kvm *kvm = vcpu->kvm;
1613 bool allowed;
1614 int idx;
1615 u32 i;
1616
1617
1618 if (index >= 0x800 && index <= 0x8ff)
1619 return true;
1620
1621 idx = srcu_read_lock(&kvm->srcu);
1622
1623 msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
1624 if (!msr_filter) {
1625 allowed = true;
1626 goto out;
1627 }
1628
1629 allowed = msr_filter->default_allow;
1630 ranges = msr_filter->ranges;
1631
1632 for (i = 0; i < msr_filter->count; i++) {
1633 u32 start = ranges[i].base;
1634 u32 end = start + ranges[i].nmsrs;
1635 u32 flags = ranges[i].flags;
1636 unsigned long *bitmap = ranges[i].bitmap;
1637
1638 if ((index >= start) && (index < end) && (flags & type)) {
1639 allowed = !!test_bit(index - start, bitmap);
1640 break;
1641 }
1642 }
1643
1644out:
1645 srcu_read_unlock(&kvm->srcu, idx);
1646
1647 return allowed;
1648}
1649EXPORT_SYMBOL_GPL(kvm_msr_allowed);
1650
1651
1652
1653
1654
1655
1656
1657static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1658 bool host_initiated)
1659{
1660 struct msr_data msr;
1661
1662 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
1663 return KVM_MSR_RET_FILTERED;
1664
1665 switch (index) {
1666 case MSR_FS_BASE:
1667 case MSR_GS_BASE:
1668 case MSR_KERNEL_GS_BASE:
1669 case MSR_CSTAR:
1670 case MSR_LSTAR:
1671 if (is_noncanonical_address(data, vcpu))
1672 return 1;
1673 break;
1674 case MSR_IA32_SYSENTER_EIP:
1675 case MSR_IA32_SYSENTER_ESP:
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
1689 break;
1690 case MSR_TSC_AUX:
1691 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1692 return 1;
1693
1694 if (!host_initiated &&
1695 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1696 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1697 return 1;
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708 if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
1709 return 1;
1710
1711 data = (u32)data;
1712 break;
1713 }
1714
1715 msr.data = data;
1716 msr.index = index;
1717 msr.host_initiated = host_initiated;
1718
1719 return static_call(kvm_x86_set_msr)(vcpu, &msr);
1720}
1721
1722static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
1723 u32 index, u64 data, bool host_initiated)
1724{
1725 int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
1726
1727 if (ret == KVM_MSR_RET_INVALID)
1728 if (kvm_msr_ignored_check(index, data, true))
1729 ret = 0;
1730
1731 return ret;
1732}
1733
1734
1735
1736
1737
1738
1739
1740int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1741 bool host_initiated)
1742{
1743 struct msr_data msr;
1744 int ret;
1745
1746 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
1747 return KVM_MSR_RET_FILTERED;
1748
1749 switch (index) {
1750 case MSR_TSC_AUX:
1751 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1752 return 1;
1753
1754 if (!host_initiated &&
1755 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1756 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1757 return 1;
1758 break;
1759 }
1760
1761 msr.index = index;
1762 msr.host_initiated = host_initiated;
1763
1764 ret = static_call(kvm_x86_get_msr)(vcpu, &msr);
1765 if (!ret)
1766 *data = msr.data;
1767 return ret;
1768}
1769
1770static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
1771 u32 index, u64 *data, bool host_initiated)
1772{
1773 int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
1774
1775 if (ret == KVM_MSR_RET_INVALID) {
1776
1777 *data = 0;
1778 if (kvm_msr_ignored_check(index, 0, false))
1779 ret = 0;
1780 }
1781
1782 return ret;
1783}
1784
1785int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1786{
1787 return kvm_get_msr_ignored_check(vcpu, index, data, false);
1788}
1789EXPORT_SYMBOL_GPL(kvm_get_msr);
1790
1791int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1792{
1793 return kvm_set_msr_ignored_check(vcpu, index, data, false);
1794}
1795EXPORT_SYMBOL_GPL(kvm_set_msr);
1796
1797static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
1798{
1799 int err = vcpu->run->msr.error;
1800 if (!err) {
1801 kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
1802 kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
1803 }
1804
1805 return static_call(kvm_x86_complete_emulated_msr)(vcpu, err);
1806}
1807
1808static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
1809{
1810 return static_call(kvm_x86_complete_emulated_msr)(vcpu, vcpu->run->msr.error);
1811}
1812
1813static u64 kvm_msr_reason(int r)
1814{
1815 switch (r) {
1816 case KVM_MSR_RET_INVALID:
1817 return KVM_MSR_EXIT_REASON_UNKNOWN;
1818 case KVM_MSR_RET_FILTERED:
1819 return KVM_MSR_EXIT_REASON_FILTER;
1820 default:
1821 return KVM_MSR_EXIT_REASON_INVAL;
1822 }
1823}
1824
1825static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
1826 u32 exit_reason, u64 data,
1827 int (*completion)(struct kvm_vcpu *vcpu),
1828 int r)
1829{
1830 u64 msr_reason = kvm_msr_reason(r);
1831
1832
1833 if (!(vcpu->kvm->arch.user_space_msr_mask & msr_reason))
1834 return 0;
1835
1836 vcpu->run->exit_reason = exit_reason;
1837 vcpu->run->msr.error = 0;
1838 memset(vcpu->run->msr.pad, 0, sizeof(vcpu->run->msr.pad));
1839 vcpu->run->msr.reason = msr_reason;
1840 vcpu->run->msr.index = index;
1841 vcpu->run->msr.data = data;
1842 vcpu->arch.complete_userspace_io = completion;
1843
1844 return 1;
1845}
1846
1847static int kvm_get_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int r)
1848{
1849 return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_RDMSR, 0,
1850 complete_emulated_rdmsr, r);
1851}
1852
1853static int kvm_set_msr_user_space(struct kvm_vcpu *vcpu, u32 index, u64 data, int r)
1854{
1855 return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_WRMSR, data,
1856 complete_emulated_wrmsr, r);
1857}
1858
1859int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1860{
1861 u32 ecx = kvm_rcx_read(vcpu);
1862 u64 data;
1863 int r;
1864
1865 r = kvm_get_msr(vcpu, ecx, &data);
1866
1867
1868 if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
1869
1870 return 0;
1871 }
1872
1873 if (!r) {
1874 trace_kvm_msr_read(ecx, data);
1875
1876 kvm_rax_write(vcpu, data & -1u);
1877 kvm_rdx_write(vcpu, (data >> 32) & -1u);
1878 } else {
1879 trace_kvm_msr_read_ex(ecx);
1880 }
1881
1882 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
1883}
1884EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
1885
1886int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
1887{
1888 u32 ecx = kvm_rcx_read(vcpu);
1889 u64 data = kvm_read_edx_eax(vcpu);
1890 int r;
1891
1892 r = kvm_set_msr(vcpu, ecx, data);
1893
1894
1895 if (r && kvm_set_msr_user_space(vcpu, ecx, data, r))
1896
1897 return 0;
1898
1899
1900 if (r < 0)
1901 return r;
1902
1903 if (!r)
1904 trace_kvm_msr_write(ecx, data);
1905 else
1906 trace_kvm_msr_write_ex(ecx, data);
1907
1908 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
1909}
1910EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
1911
1912int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
1913{
1914 return kvm_skip_emulated_instruction(vcpu);
1915}
1916EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
1917
1918int kvm_emulate_invd(struct kvm_vcpu *vcpu)
1919{
1920
1921 return kvm_emulate_as_nop(vcpu);
1922}
1923EXPORT_SYMBOL_GPL(kvm_emulate_invd);
1924
1925int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
1926{
1927 pr_warn_once("kvm: MWAIT instruction emulated as NOP!\n");
1928 return kvm_emulate_as_nop(vcpu);
1929}
1930EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
1931
1932int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
1933{
1934 kvm_queue_exception(vcpu, UD_VECTOR);
1935 return 1;
1936}
1937EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
1938
1939int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
1940{
1941 pr_warn_once("kvm: MONITOR instruction emulated as NOP!\n");
1942 return kvm_emulate_as_nop(vcpu);
1943}
1944EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
1945
1946static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
1947{
1948 xfer_to_guest_mode_prepare();
1949 return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
1950 xfer_to_guest_mode_work_pending();
1951}
1952
1953
1954
1955
1956
1957
1958
1959
1960static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
1961{
1962 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
1963 return 1;
1964
1965 if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
1966 ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
1967 ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
1968 ((u32)(data >> 32) != X2APIC_BROADCAST)) {
1969
1970 data &= ~(1 << 12);
1971 kvm_apic_send_ipi(vcpu->arch.apic, (u32)data, (u32)(data >> 32));
1972 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
1973 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR, (u32)data);
1974 trace_kvm_apic_write(APIC_ICR, (u32)data);
1975 return 0;
1976 }
1977
1978 return 1;
1979}
1980
1981static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
1982{
1983 if (!kvm_can_use_hv_timer(vcpu))
1984 return 1;
1985
1986 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1987 return 0;
1988}
1989
1990fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
1991{
1992 u32 msr = kvm_rcx_read(vcpu);
1993 u64 data;
1994 fastpath_t ret = EXIT_FASTPATH_NONE;
1995
1996 switch (msr) {
1997 case APIC_BASE_MSR + (APIC_ICR >> 4):
1998 data = kvm_read_edx_eax(vcpu);
1999 if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
2000 kvm_skip_emulated_instruction(vcpu);
2001 ret = EXIT_FASTPATH_EXIT_HANDLED;
2002 }
2003 break;
2004 case MSR_IA32_TSC_DEADLINE:
2005 data = kvm_read_edx_eax(vcpu);
2006 if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
2007 kvm_skip_emulated_instruction(vcpu);
2008 ret = EXIT_FASTPATH_REENTER_GUEST;
2009 }
2010 break;
2011 default:
2012 break;
2013 }
2014
2015 if (ret != EXIT_FASTPATH_NONE)
2016 trace_kvm_msr_write(msr, data);
2017
2018 return ret;
2019}
2020EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
2021
2022
2023
2024
2025static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2026{
2027 return kvm_get_msr_ignored_check(vcpu, index, data, true);
2028}
2029
2030static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2031{
2032 return kvm_set_msr_ignored_check(vcpu, index, *data, true);
2033}
2034
2035#ifdef CONFIG_X86_64
2036struct pvclock_clock {
2037 int vclock_mode;
2038 u64 cycle_last;
2039 u64 mask;
2040 u32 mult;
2041 u32 shift;
2042 u64 base_cycles;
2043 u64 offset;
2044};
2045
2046struct pvclock_gtod_data {
2047 seqcount_t seq;
2048
2049 struct pvclock_clock clock;
2050 struct pvclock_clock raw_clock;
2051
2052 ktime_t offs_boot;
2053 u64 wall_time_sec;
2054};
2055
2056static struct pvclock_gtod_data pvclock_gtod_data;
2057
2058static void update_pvclock_gtod(struct timekeeper *tk)
2059{
2060 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
2061
2062 write_seqcount_begin(&vdata->seq);
2063
2064
2065 vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
2066 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
2067 vdata->clock.mask = tk->tkr_mono.mask;
2068 vdata->clock.mult = tk->tkr_mono.mult;
2069 vdata->clock.shift = tk->tkr_mono.shift;
2070 vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
2071 vdata->clock.offset = tk->tkr_mono.base;
2072
2073 vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
2074 vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
2075 vdata->raw_clock.mask = tk->tkr_raw.mask;
2076 vdata->raw_clock.mult = tk->tkr_raw.mult;
2077 vdata->raw_clock.shift = tk->tkr_raw.shift;
2078 vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
2079 vdata->raw_clock.offset = tk->tkr_raw.base;
2080
2081 vdata->wall_time_sec = tk->xtime_sec;
2082
2083 vdata->offs_boot = tk->offs_boot;
2084
2085 write_seqcount_end(&vdata->seq);
2086}
2087
2088static s64 get_kvmclock_base_ns(void)
2089{
2090
2091 return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
2092}
2093#else
2094static s64 get_kvmclock_base_ns(void)
2095{
2096
2097 return ktime_get_boottime_ns();
2098}
2099#endif
2100
2101void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
2102{
2103 int version;
2104 int r;
2105 struct pvclock_wall_clock wc;
2106 u32 wc_sec_hi;
2107 u64 wall_nsec;
2108
2109 if (!wall_clock)
2110 return;
2111
2112 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
2113 if (r)
2114 return;
2115
2116 if (version & 1)
2117 ++version;
2118
2119 ++version;
2120
2121 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
2122 return;
2123
2124
2125
2126
2127
2128
2129 wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
2130
2131 wc.nsec = do_div(wall_nsec, 1000000000);
2132 wc.sec = (u32)wall_nsec;
2133 wc.version = version;
2134
2135 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
2136
2137 if (sec_hi_ofs) {
2138 wc_sec_hi = wall_nsec >> 32;
2139 kvm_write_guest(kvm, wall_clock + sec_hi_ofs,
2140 &wc_sec_hi, sizeof(wc_sec_hi));
2141 }
2142
2143 version++;
2144 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
2145}
2146
2147static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
2148 bool old_msr, bool host_initiated)
2149{
2150 struct kvm_arch *ka = &vcpu->kvm->arch;
2151
2152 if (vcpu->vcpu_id == 0 && !host_initiated) {
2153 if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
2154 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2155
2156 ka->boot_vcpu_runs_old_kvmclock = old_msr;
2157 }
2158
2159 vcpu->arch.time = system_time;
2160 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2161
2162
2163 vcpu->arch.pv_time_enabled = false;
2164 if (!(system_time & 1))
2165 return;
2166
2167 if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
2168 &vcpu->arch.pv_time, system_time & ~1ULL,
2169 sizeof(struct pvclock_vcpu_time_info)))
2170 vcpu->arch.pv_time_enabled = true;
2171
2172 return;
2173}
2174
2175static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
2176{
2177 do_shl32_div32(dividend, divisor);
2178 return dividend;
2179}
2180
2181static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
2182 s8 *pshift, u32 *pmultiplier)
2183{
2184 uint64_t scaled64;
2185 int32_t shift = 0;
2186 uint64_t tps64;
2187 uint32_t tps32;
2188
2189 tps64 = base_hz;
2190 scaled64 = scaled_hz;
2191 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
2192 tps64 >>= 1;
2193 shift--;
2194 }
2195
2196 tps32 = (uint32_t)tps64;
2197 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
2198 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
2199 scaled64 >>= 1;
2200 else
2201 tps32 <<= 1;
2202 shift++;
2203 }
2204
2205 *pshift = shift;
2206 *pmultiplier = div_frac(scaled64, tps32);
2207}
2208
2209#ifdef CONFIG_X86_64
2210static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
2211#endif
2212
2213static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
2214static unsigned long max_tsc_khz;
2215
2216static u32 adjust_tsc_khz(u32 khz, s32 ppm)
2217{
2218 u64 v = (u64)khz * (1000000 + ppm);
2219 do_div(v, 1000000);
2220 return v;
2221}
2222
2223static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier);
2224
2225static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
2226{
2227 u64 ratio;
2228
2229
2230 if (!scale) {
2231 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
2232 return 0;
2233 }
2234
2235
2236 if (!kvm_has_tsc_control) {
2237 if (user_tsc_khz > tsc_khz) {
2238 vcpu->arch.tsc_catchup = 1;
2239 vcpu->arch.tsc_always_catchup = 1;
2240 return 0;
2241 } else {
2242 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
2243 return -1;
2244 }
2245 }
2246
2247
2248 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
2249 user_tsc_khz, tsc_khz);
2250
2251 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
2252 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
2253 user_tsc_khz);
2254 return -1;
2255 }
2256
2257 kvm_vcpu_write_tsc_multiplier(vcpu, ratio);
2258 return 0;
2259}
2260
2261static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
2262{
2263 u32 thresh_lo, thresh_hi;
2264 int use_scaling = 0;
2265
2266
2267 if (user_tsc_khz == 0) {
2268
2269 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
2270 return -1;
2271 }
2272
2273
2274 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
2275 &vcpu->arch.virtual_tsc_shift,
2276 &vcpu->arch.virtual_tsc_mult);
2277 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
2278
2279
2280
2281
2282
2283
2284
2285 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
2286 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
2287 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
2288 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
2289 use_scaling = 1;
2290 }
2291 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
2292}
2293
2294static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
2295{
2296 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
2297 vcpu->arch.virtual_tsc_mult,
2298 vcpu->arch.virtual_tsc_shift);
2299 tsc += vcpu->arch.this_tsc_write;
2300 return tsc;
2301}
2302
2303static inline int gtod_is_based_on_tsc(int mode)
2304{
2305 return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
2306}
2307
2308static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
2309{
2310#ifdef CONFIG_X86_64
2311 bool vcpus_matched;
2312 struct kvm_arch *ka = &vcpu->kvm->arch;
2313 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2314
2315 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2316 atomic_read(&vcpu->kvm->online_vcpus));
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326 if (ka->use_master_clock ||
2327 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
2328 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2329
2330 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
2331 atomic_read(&vcpu->kvm->online_vcpus),
2332 ka->use_master_clock, gtod->clock.vclock_mode);
2333#endif
2334}
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346static inline u64 __scale_tsc(u64 ratio, u64 tsc)
2347{
2348 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
2349}
2350
2351u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio)
2352{
2353 u64 _tsc = tsc;
2354
2355 if (ratio != kvm_default_tsc_scaling_ratio)
2356 _tsc = __scale_tsc(ratio, tsc);
2357
2358 return _tsc;
2359}
2360EXPORT_SYMBOL_GPL(kvm_scale_tsc);
2361
2362static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
2363{
2364 u64 tsc;
2365
2366 tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio);
2367
2368 return target_tsc - tsc;
2369}
2370
2371u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2372{
2373 return vcpu->arch.l1_tsc_offset +
2374 kvm_scale_tsc(vcpu, host_tsc, vcpu->arch.l1_tsc_scaling_ratio);
2375}
2376EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
2377
2378u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
2379{
2380 u64 nested_offset;
2381
2382 if (l2_multiplier == kvm_default_tsc_scaling_ratio)
2383 nested_offset = l1_offset;
2384 else
2385 nested_offset = mul_s64_u64_shr((s64) l1_offset, l2_multiplier,
2386 kvm_tsc_scaling_ratio_frac_bits);
2387
2388 nested_offset += l2_offset;
2389 return nested_offset;
2390}
2391EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
2392
2393u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
2394{
2395 if (l2_multiplier != kvm_default_tsc_scaling_ratio)
2396 return mul_u64_u64_shr(l1_multiplier, l2_multiplier,
2397 kvm_tsc_scaling_ratio_frac_bits);
2398
2399 return l1_multiplier;
2400}
2401EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
2402
2403static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
2404{
2405 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2406 vcpu->arch.l1_tsc_offset,
2407 l1_offset);
2408
2409 vcpu->arch.l1_tsc_offset = l1_offset;
2410
2411
2412
2413
2414
2415
2416 if (is_guest_mode(vcpu))
2417 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2418 l1_offset,
2419 static_call(kvm_x86_get_l2_tsc_offset)(vcpu),
2420 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2421 else
2422 vcpu->arch.tsc_offset = l1_offset;
2423
2424 static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
2425}
2426
2427static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
2428{
2429 vcpu->arch.l1_tsc_scaling_ratio = l1_multiplier;
2430
2431
2432 if (is_guest_mode(vcpu))
2433 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2434 l1_multiplier,
2435 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2436 else
2437 vcpu->arch.tsc_scaling_ratio = l1_multiplier;
2438
2439 if (kvm_has_tsc_control)
2440 static_call(kvm_x86_write_tsc_multiplier)(
2441 vcpu, vcpu->arch.tsc_scaling_ratio);
2442}
2443
2444static inline bool kvm_check_tsc_unstable(void)
2445{
2446#ifdef CONFIG_X86_64
2447
2448
2449
2450
2451 if (pvclock_gtod_data.clock.vclock_mode == VDSO_CLOCKMODE_HVCLOCK)
2452 return false;
2453#endif
2454 return check_tsc_unstable();
2455}
2456
2457static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
2458{
2459 struct kvm *kvm = vcpu->kvm;
2460 u64 offset, ns, elapsed;
2461 unsigned long flags;
2462 bool matched;
2463 bool already_matched;
2464 bool synchronizing = false;
2465
2466 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
2467 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2468 ns = get_kvmclock_base_ns();
2469 elapsed = ns - kvm->arch.last_tsc_nsec;
2470
2471 if (vcpu->arch.virtual_tsc_khz) {
2472 if (data == 0) {
2473
2474
2475
2476
2477
2478 synchronizing = true;
2479 } else {
2480 u64 tsc_exp = kvm->arch.last_tsc_write +
2481 nsec_to_cycles(vcpu, elapsed);
2482 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
2483
2484
2485
2486
2487
2488 synchronizing = data < tsc_exp + tsc_hz &&
2489 data + tsc_hz > tsc_exp;
2490 }
2491 }
2492
2493
2494
2495
2496
2497
2498
2499 if (synchronizing &&
2500 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
2501 if (!kvm_check_tsc_unstable()) {
2502 offset = kvm->arch.cur_tsc_offset;
2503 } else {
2504 u64 delta = nsec_to_cycles(vcpu, elapsed);
2505 data += delta;
2506 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2507 }
2508 matched = true;
2509 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
2510 } else {
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520 kvm->arch.cur_tsc_generation++;
2521 kvm->arch.cur_tsc_nsec = ns;
2522 kvm->arch.cur_tsc_write = data;
2523 kvm->arch.cur_tsc_offset = offset;
2524 matched = false;
2525 }
2526
2527
2528
2529
2530
2531 kvm->arch.last_tsc_nsec = ns;
2532 kvm->arch.last_tsc_write = data;
2533 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
2534
2535 vcpu->arch.last_guest_tsc = data;
2536
2537
2538 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
2539 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
2540 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
2541
2542 kvm_vcpu_write_tsc_offset(vcpu, offset);
2543 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
2544
2545 raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
2546 if (!matched) {
2547 kvm->arch.nr_vcpus_matched_tsc = 0;
2548 } else if (!already_matched) {
2549 kvm->arch.nr_vcpus_matched_tsc++;
2550 }
2551
2552 kvm_track_tsc_matching(vcpu);
2553 raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
2554}
2555
2556static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
2557 s64 adjustment)
2558{
2559 u64 tsc_offset = vcpu->arch.l1_tsc_offset;
2560 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
2561}
2562
2563static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
2564{
2565 if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
2566 WARN_ON(adjustment < 0);
2567 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment,
2568 vcpu->arch.l1_tsc_scaling_ratio);
2569 adjust_tsc_offset_guest(vcpu, adjustment);
2570}
2571
2572#ifdef CONFIG_X86_64
2573
2574static u64 read_tsc(void)
2575{
2576 u64 ret = (u64)rdtsc_ordered();
2577 u64 last = pvclock_gtod_data.clock.cycle_last;
2578
2579 if (likely(ret >= last))
2580 return ret;
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590 asm volatile ("");
2591 return last;
2592}
2593
2594static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
2595 int *mode)
2596{
2597 long v;
2598 u64 tsc_pg_val;
2599
2600 switch (clock->vclock_mode) {
2601 case VDSO_CLOCKMODE_HVCLOCK:
2602 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2603 tsc_timestamp);
2604 if (tsc_pg_val != U64_MAX) {
2605
2606 *mode = VDSO_CLOCKMODE_HVCLOCK;
2607 v = (tsc_pg_val - clock->cycle_last) &
2608 clock->mask;
2609 } else {
2610
2611 *mode = VDSO_CLOCKMODE_NONE;
2612 }
2613 break;
2614 case VDSO_CLOCKMODE_TSC:
2615 *mode = VDSO_CLOCKMODE_TSC;
2616 *tsc_timestamp = read_tsc();
2617 v = (*tsc_timestamp - clock->cycle_last) &
2618 clock->mask;
2619 break;
2620 default:
2621 *mode = VDSO_CLOCKMODE_NONE;
2622 }
2623
2624 if (*mode == VDSO_CLOCKMODE_NONE)
2625 *tsc_timestamp = v = 0;
2626
2627 return v * clock->mult;
2628}
2629
2630static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
2631{
2632 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2633 unsigned long seq;
2634 int mode;
2635 u64 ns;
2636
2637 do {
2638 seq = read_seqcount_begin(>od->seq);
2639 ns = gtod->raw_clock.base_cycles;
2640 ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
2641 ns >>= gtod->raw_clock.shift;
2642 ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
2643 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2644 *t = ns;
2645
2646 return mode;
2647}
2648
2649static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2650{
2651 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2652 unsigned long seq;
2653 int mode;
2654 u64 ns;
2655
2656 do {
2657 seq = read_seqcount_begin(>od->seq);
2658 ts->tv_sec = gtod->wall_time_sec;
2659 ns = gtod->clock.base_cycles;
2660 ns += vgettsc(>od->clock, tsc_timestamp, &mode);
2661 ns >>= gtod->clock.shift;
2662 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2663
2664 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2665 ts->tv_nsec = ns;
2666
2667 return mode;
2668}
2669
2670
2671static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2672{
2673
2674 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2675 return false;
2676
2677 return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
2678 tsc_timestamp));
2679}
2680
2681
2682static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2683 u64 *tsc_timestamp)
2684{
2685
2686 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2687 return false;
2688
2689 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2690}
2691#endif
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2735{
2736#ifdef CONFIG_X86_64
2737 struct kvm_arch *ka = &kvm->arch;
2738 int vclock_mode;
2739 bool host_tsc_clocksource, vcpus_matched;
2740
2741 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2742 atomic_read(&kvm->online_vcpus));
2743
2744
2745
2746
2747
2748 host_tsc_clocksource = kvm_get_time_and_clockread(
2749 &ka->master_kernel_ns,
2750 &ka->master_cycle_now);
2751
2752 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2753 && !ka->backwards_tsc_observed
2754 && !ka->boot_vcpu_runs_old_kvmclock;
2755
2756 if (ka->use_master_clock)
2757 atomic_set(&kvm_guest_has_master_clock, 1);
2758
2759 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2760 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2761 vcpus_matched);
2762#endif
2763}
2764
2765void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2766{
2767 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2768}
2769
2770static void kvm_gen_update_masterclock(struct kvm *kvm)
2771{
2772#ifdef CONFIG_X86_64
2773 int i;
2774 struct kvm_vcpu *vcpu;
2775 struct kvm_arch *ka = &kvm->arch;
2776 unsigned long flags;
2777
2778 kvm_hv_invalidate_tsc_page(kvm);
2779
2780 kvm_make_mclock_inprogress_request(kvm);
2781
2782
2783 raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2784 pvclock_update_vm_gtod_copy(kvm);
2785 raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2786
2787 kvm_for_each_vcpu(i, vcpu, kvm)
2788 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2789
2790
2791 kvm_for_each_vcpu(i, vcpu, kvm)
2792 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2793#endif
2794}
2795
2796u64 get_kvmclock_ns(struct kvm *kvm)
2797{
2798 struct kvm_arch *ka = &kvm->arch;
2799 struct pvclock_vcpu_time_info hv_clock;
2800 unsigned long flags;
2801 u64 ret;
2802
2803 raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2804 if (!ka->use_master_clock) {
2805 raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2806 return get_kvmclock_base_ns() + ka->kvmclock_offset;
2807 }
2808
2809 hv_clock.tsc_timestamp = ka->master_cycle_now;
2810 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2811 raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2812
2813
2814 get_cpu();
2815
2816 if (__this_cpu_read(cpu_tsc_khz)) {
2817 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2818 &hv_clock.tsc_shift,
2819 &hv_clock.tsc_to_system_mul);
2820 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
2821 } else
2822 ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
2823
2824 put_cpu();
2825
2826 return ret;
2827}
2828
2829static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
2830 struct gfn_to_hva_cache *cache,
2831 unsigned int offset)
2832{
2833 struct kvm_vcpu_arch *vcpu = &v->arch;
2834 struct pvclock_vcpu_time_info guest_hv_clock;
2835
2836 if (unlikely(kvm_read_guest_offset_cached(v->kvm, cache,
2837 &guest_hv_clock, offset, sizeof(guest_hv_clock))))
2838 return;
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2855
2856 if (guest_hv_clock.version & 1)
2857 ++guest_hv_clock.version;
2858
2859 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2860 kvm_write_guest_offset_cached(v->kvm, cache,
2861 &vcpu->hv_clock, offset,
2862 sizeof(vcpu->hv_clock.version));
2863
2864 smp_wmb();
2865
2866
2867 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2868
2869 if (vcpu->pvclock_set_guest_stopped_request) {
2870 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2871 vcpu->pvclock_set_guest_stopped_request = false;
2872 }
2873
2874 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2875
2876 kvm_write_guest_offset_cached(v->kvm, cache,
2877 &vcpu->hv_clock, offset,
2878 sizeof(vcpu->hv_clock));
2879
2880 smp_wmb();
2881
2882 vcpu->hv_clock.version++;
2883 kvm_write_guest_offset_cached(v->kvm, cache,
2884 &vcpu->hv_clock, offset,
2885 sizeof(vcpu->hv_clock.version));
2886}
2887
2888static int kvm_guest_time_update(struct kvm_vcpu *v)
2889{
2890 unsigned long flags, tgt_tsc_khz;
2891 struct kvm_vcpu_arch *vcpu = &v->arch;
2892 struct kvm_arch *ka = &v->kvm->arch;
2893 s64 kernel_ns;
2894 u64 tsc_timestamp, host_tsc;
2895 u8 pvclock_flags;
2896 bool use_master_clock;
2897
2898 kernel_ns = 0;
2899 host_tsc = 0;
2900
2901
2902
2903
2904
2905 raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2906 use_master_clock = ka->use_master_clock;
2907 if (use_master_clock) {
2908 host_tsc = ka->master_cycle_now;
2909 kernel_ns = ka->master_kernel_ns;
2910 }
2911 raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2912
2913
2914 local_irq_save(flags);
2915 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2916 if (unlikely(tgt_tsc_khz == 0)) {
2917 local_irq_restore(flags);
2918 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2919 return 1;
2920 }
2921 if (!use_master_clock) {
2922 host_tsc = rdtsc();
2923 kernel_ns = get_kvmclock_base_ns();
2924 }
2925
2926 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938 if (vcpu->tsc_catchup) {
2939 u64 tsc = compute_guest_tsc(v, kernel_ns);
2940 if (tsc > tsc_timestamp) {
2941 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
2942 tsc_timestamp = tsc;
2943 }
2944 }
2945
2946 local_irq_restore(flags);
2947
2948
2949
2950 if (kvm_has_tsc_control)
2951 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz,
2952 v->arch.l1_tsc_scaling_ratio);
2953
2954 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
2955 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
2956 &vcpu->hv_clock.tsc_shift,
2957 &vcpu->hv_clock.tsc_to_system_mul);
2958 vcpu->hw_tsc_khz = tgt_tsc_khz;
2959 }
2960
2961 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
2962 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
2963 vcpu->last_guest_tsc = tsc_timestamp;
2964
2965
2966 pvclock_flags = 0;
2967 if (use_master_clock)
2968 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
2969
2970 vcpu->hv_clock.flags = pvclock_flags;
2971
2972 if (vcpu->pv_time_enabled)
2973 kvm_setup_pvclock_page(v, &vcpu->pv_time, 0);
2974 if (vcpu->xen.vcpu_info_set)
2975 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_info_cache,
2976 offsetof(struct compat_vcpu_info, time));
2977 if (vcpu->xen.vcpu_time_info_set)
2978 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
2979 if (!v->vcpu_idx)
2980 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
2981 return 0;
2982}
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
2999
3000static void kvmclock_update_fn(struct work_struct *work)
3001{
3002 int i;
3003 struct delayed_work *dwork = to_delayed_work(work);
3004 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3005 kvmclock_update_work);
3006 struct kvm *kvm = container_of(ka, struct kvm, arch);
3007 struct kvm_vcpu *vcpu;
3008
3009 kvm_for_each_vcpu(i, vcpu, kvm) {
3010 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3011 kvm_vcpu_kick(vcpu);
3012 }
3013}
3014
3015static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
3016{
3017 struct kvm *kvm = v->kvm;
3018
3019 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
3020 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
3021 KVMCLOCK_UPDATE_DELAY);
3022}
3023
3024#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
3025
3026static void kvmclock_sync_fn(struct work_struct *work)
3027{
3028 struct delayed_work *dwork = to_delayed_work(work);
3029 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3030 kvmclock_sync_work);
3031 struct kvm *kvm = container_of(ka, struct kvm, arch);
3032
3033 if (!kvmclock_periodic_sync)
3034 return;
3035
3036 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
3037 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
3038 KVMCLOCK_SYNC_PERIOD);
3039}
3040
3041
3042
3043
3044static bool can_set_mci_status(struct kvm_vcpu *vcpu)
3045{
3046
3047 if (guest_cpuid_is_amd_or_hygon(vcpu))
3048 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
3049
3050 return false;
3051}
3052
3053static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3054{
3055 u64 mcg_cap = vcpu->arch.mcg_cap;
3056 unsigned bank_num = mcg_cap & 0xff;
3057 u32 msr = msr_info->index;
3058 u64 data = msr_info->data;
3059
3060 switch (msr) {
3061 case MSR_IA32_MCG_STATUS:
3062 vcpu->arch.mcg_status = data;
3063 break;
3064 case MSR_IA32_MCG_CTL:
3065 if (!(mcg_cap & MCG_CTL_P) &&
3066 (data || !msr_info->host_initiated))
3067 return 1;
3068 if (data != 0 && data != ~(u64)0)
3069 return 1;
3070 vcpu->arch.mcg_ctl = data;
3071 break;
3072 default:
3073 if (msr >= MSR_IA32_MC0_CTL &&
3074 msr < MSR_IA32_MCx_CTL(bank_num)) {
3075 u32 offset = array_index_nospec(
3076 msr - MSR_IA32_MC0_CTL,
3077 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3078
3079
3080
3081
3082
3083
3084 if ((offset & 0x3) == 0 &&
3085 data != 0 && (data | (1 << 10)) != ~(u64)0)
3086 return -1;
3087
3088
3089 if (!msr_info->host_initiated &&
3090 (offset & 0x3) == 1 && data != 0) {
3091 if (!can_set_mci_status(vcpu))
3092 return -1;
3093 }
3094
3095 vcpu->arch.mce_banks[offset] = data;
3096 break;
3097 }
3098 return 1;
3099 }
3100 return 0;
3101}
3102
3103static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
3104{
3105 u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
3106
3107 return (vcpu->arch.apf.msr_en_val & mask) == mask;
3108}
3109
3110static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
3111{
3112 gpa_t gpa = data & ~0x3f;
3113
3114
3115 if (data & 0x30)
3116 return 1;
3117
3118 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
3119 (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
3120 return 1;
3121
3122 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
3123 (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
3124 return 1;
3125
3126 if (!lapic_in_kernel(vcpu))
3127 return data ? 1 : 0;
3128
3129 vcpu->arch.apf.msr_en_val = data;
3130
3131 if (!kvm_pv_async_pf_enabled(vcpu)) {
3132 kvm_clear_async_pf_completion_queue(vcpu);
3133 kvm_async_pf_hash_reset(vcpu);
3134 return 0;
3135 }
3136
3137 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
3138 sizeof(u64)))
3139 return 1;
3140
3141 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
3142 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
3143
3144 kvm_async_pf_wakeup_all(vcpu);
3145
3146 return 0;
3147}
3148
3149static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
3150{
3151
3152 if (data >> 8)
3153 return 1;
3154
3155 if (!lapic_in_kernel(vcpu))
3156 return 1;
3157
3158 vcpu->arch.apf.msr_int_val = data;
3159
3160 vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
3161
3162 return 0;
3163}
3164
3165static void kvmclock_reset(struct kvm_vcpu *vcpu)
3166{
3167 vcpu->arch.pv_time_enabled = false;
3168 vcpu->arch.time = 0;
3169}
3170
3171static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
3172{
3173 ++vcpu->stat.tlb_flush;
3174 static_call(kvm_x86_tlb_flush_all)(vcpu);
3175}
3176
3177static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
3178{
3179 ++vcpu->stat.tlb_flush;
3180
3181 if (!tdp_enabled) {
3182
3183
3184
3185
3186
3187
3188
3189 kvm_mmu_unload(vcpu);
3190 return;
3191 }
3192
3193 static_call(kvm_x86_tlb_flush_guest)(vcpu);
3194}
3195
3196static void record_steal_time(struct kvm_vcpu *vcpu)
3197{
3198 struct kvm_host_map map;
3199 struct kvm_steal_time *st;
3200
3201 if (kvm_xen_msr_enabled(vcpu->kvm)) {
3202 kvm_xen_runstate_set_running(vcpu);
3203 return;
3204 }
3205
3206 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3207 return;
3208
3209
3210 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
3211 &map, &vcpu->arch.st.cache, false))
3212 return;
3213
3214 st = map.hva +
3215 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
3216
3217
3218
3219
3220
3221 if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
3222 u8 st_preempted = xchg(&st->preempted, 0);
3223
3224 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
3225 st_preempted & KVM_VCPU_FLUSH_TLB);
3226 if (st_preempted & KVM_VCPU_FLUSH_TLB)
3227 kvm_vcpu_flush_tlb_guest(vcpu);
3228 } else {
3229 st->preempted = 0;
3230 }
3231
3232 vcpu->arch.st.preempted = 0;
3233
3234 if (st->version & 1)
3235 st->version += 1;
3236
3237 st->version += 1;
3238
3239 smp_wmb();
3240
3241 st->steal += current->sched_info.run_delay -
3242 vcpu->arch.st.last_steal;
3243 vcpu->arch.st.last_steal = current->sched_info.run_delay;
3244
3245 smp_wmb();
3246
3247 st->version += 1;
3248
3249 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
3250}
3251
3252int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3253{
3254 bool pr = false;
3255 u32 msr = msr_info->index;
3256 u64 data = msr_info->data;
3257
3258 if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
3259 return kvm_xen_write_hypercall_page(vcpu, data);
3260
3261 switch (msr) {
3262 case MSR_AMD64_NB_CFG:
3263 case MSR_IA32_UCODE_WRITE:
3264 case MSR_VM_HSAVE_PA:
3265 case MSR_AMD64_PATCH_LOADER:
3266 case MSR_AMD64_BU_CFG2:
3267 case MSR_AMD64_DC_CFG:
3268 case MSR_F15H_EX_CFG:
3269 break;
3270
3271 case MSR_IA32_UCODE_REV:
3272 if (msr_info->host_initiated)
3273 vcpu->arch.microcode_version = data;
3274 break;
3275 case MSR_IA32_ARCH_CAPABILITIES:
3276 if (!msr_info->host_initiated)
3277 return 1;
3278 vcpu->arch.arch_capabilities = data;
3279 break;
3280 case MSR_IA32_PERF_CAPABILITIES: {
3281 struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
3282
3283 if (!msr_info->host_initiated)
3284 return 1;
3285 if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
3286 return 1;
3287 if (data & ~msr_ent.data)
3288 return 1;
3289
3290 vcpu->arch.perf_capabilities = data;
3291
3292 return 0;
3293 }
3294 case MSR_EFER:
3295 return set_efer(vcpu, msr_info);
3296 case MSR_K7_HWCR:
3297 data &= ~(u64)0x40;
3298 data &= ~(u64)0x100;
3299 data &= ~(u64)0x8;
3300
3301
3302 if (data == BIT_ULL(18)) {
3303 vcpu->arch.msr_hwcr = data;
3304 } else if (data != 0) {
3305 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
3306 data);
3307 return 1;
3308 }
3309 break;
3310 case MSR_FAM10H_MMIO_CONF_BASE:
3311 if (data != 0) {
3312 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
3313 "0x%llx\n", data);
3314 return 1;
3315 }
3316 break;
3317 case 0x200 ... 0x2ff:
3318 return kvm_mtrr_set_msr(vcpu, msr, data);
3319 case MSR_IA32_APICBASE:
3320 return kvm_set_apic_base(vcpu, msr_info);
3321 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3322 return kvm_x2apic_msr_write(vcpu, msr, data);
3323 case MSR_IA32_TSC_DEADLINE:
3324 kvm_set_lapic_tscdeadline_msr(vcpu, data);
3325 break;
3326 case MSR_IA32_TSC_ADJUST:
3327 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
3328 if (!msr_info->host_initiated) {
3329 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
3330 adjust_tsc_offset_guest(vcpu, adj);
3331
3332
3333
3334 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3335 }
3336 vcpu->arch.ia32_tsc_adjust_msr = data;
3337 }
3338 break;
3339 case MSR_IA32_MISC_ENABLE:
3340 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
3341 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
3342 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
3343 return 1;
3344 vcpu->arch.ia32_misc_enable_msr = data;
3345 kvm_update_cpuid_runtime(vcpu);
3346 } else {
3347 vcpu->arch.ia32_misc_enable_msr = data;
3348 }
3349 break;
3350 case MSR_IA32_SMBASE:
3351 if (!msr_info->host_initiated)
3352 return 1;
3353 vcpu->arch.smbase = data;
3354 break;
3355 case MSR_IA32_POWER_CTL:
3356 vcpu->arch.msr_ia32_power_ctl = data;
3357 break;
3358 case MSR_IA32_TSC:
3359 if (msr_info->host_initiated) {
3360 kvm_synchronize_tsc(vcpu, data);
3361 } else {
3362 u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
3363 adjust_tsc_offset_guest(vcpu, adj);
3364 vcpu->arch.ia32_tsc_adjust_msr += adj;
3365 }
3366 break;
3367 case MSR_IA32_XSS:
3368 if (!msr_info->host_initiated &&
3369 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3370 return 1;
3371
3372
3373
3374
3375
3376 if (data & ~supported_xss)
3377 return 1;
3378 vcpu->arch.ia32_xss = data;
3379 break;
3380 case MSR_SMI_COUNT:
3381 if (!msr_info->host_initiated)
3382 return 1;
3383 vcpu->arch.smi_count = data;
3384 break;
3385 case MSR_KVM_WALL_CLOCK_NEW:
3386 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3387 return 1;
3388
3389 vcpu->kvm->arch.wall_clock = data;
3390 kvm_write_wall_clock(vcpu->kvm, data, 0);
3391 break;
3392 case MSR_KVM_WALL_CLOCK:
3393 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3394 return 1;
3395
3396 vcpu->kvm->arch.wall_clock = data;
3397 kvm_write_wall_clock(vcpu->kvm, data, 0);
3398 break;
3399 case MSR_KVM_SYSTEM_TIME_NEW:
3400 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3401 return 1;
3402
3403 kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
3404 break;
3405 case MSR_KVM_SYSTEM_TIME:
3406 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3407 return 1;
3408
3409 kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
3410 break;
3411 case MSR_KVM_ASYNC_PF_EN:
3412 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3413 return 1;
3414
3415 if (kvm_pv_enable_async_pf(vcpu, data))
3416 return 1;
3417 break;
3418 case MSR_KVM_ASYNC_PF_INT:
3419 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3420 return 1;
3421
3422 if (kvm_pv_enable_async_pf_int(vcpu, data))
3423 return 1;
3424 break;
3425 case MSR_KVM_ASYNC_PF_ACK:
3426 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3427 return 1;
3428 if (data & 0x1) {
3429 vcpu->arch.apf.pageready_pending = false;
3430 kvm_check_async_pf_completion(vcpu);
3431 }
3432 break;
3433 case MSR_KVM_STEAL_TIME:
3434 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3435 return 1;
3436
3437 if (unlikely(!sched_info_on()))
3438 return 1;
3439
3440 if (data & KVM_STEAL_RESERVED_MASK)
3441 return 1;
3442
3443 vcpu->arch.st.msr_val = data;
3444
3445 if (!(data & KVM_MSR_ENABLED))
3446 break;
3447
3448 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3449
3450 break;
3451 case MSR_KVM_PV_EOI_EN:
3452 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3453 return 1;
3454
3455 if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
3456 return 1;
3457 break;
3458
3459 case MSR_KVM_POLL_CONTROL:
3460 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3461 return 1;
3462
3463
3464 if (data & (-1ULL << 1))
3465 return 1;
3466
3467 vcpu->arch.msr_kvm_poll_control = data;
3468 break;
3469
3470 case MSR_IA32_MCG_CTL:
3471 case MSR_IA32_MCG_STATUS:
3472 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3473 return set_msr_mce(vcpu, msr_info);
3474
3475 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3476 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3477 pr = true;
3478 fallthrough;
3479 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3480 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3481 if (kvm_pmu_is_valid_msr(vcpu, msr))
3482 return kvm_pmu_set_msr(vcpu, msr_info);
3483
3484 if (pr || data != 0)
3485 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
3486 "0x%x data 0x%llx\n", msr, data);
3487 break;
3488 case MSR_K7_CLK_CTL:
3489
3490
3491
3492
3493
3494
3495
3496
3497 break;
3498 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3499 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3500 case HV_X64_MSR_SYNDBG_OPTIONS:
3501 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3502 case HV_X64_MSR_CRASH_CTL:
3503 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3504 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3505 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3506 case HV_X64_MSR_TSC_EMULATION_STATUS:
3507 return kvm_hv_set_msr_common(vcpu, msr, data,
3508 msr_info->host_initiated);
3509 case MSR_IA32_BBL_CR_CTL3:
3510
3511
3512
3513 if (report_ignored_msrs)
3514 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
3515 msr, data);
3516 break;
3517 case MSR_AMD64_OSVW_ID_LENGTH:
3518 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3519 return 1;
3520 vcpu->arch.osvw.length = data;
3521 break;
3522 case MSR_AMD64_OSVW_STATUS:
3523 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3524 return 1;
3525 vcpu->arch.osvw.status = data;
3526 break;
3527 case MSR_PLATFORM_INFO:
3528 if (!msr_info->host_initiated ||
3529 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
3530 cpuid_fault_enabled(vcpu)))
3531 return 1;
3532 vcpu->arch.msr_platform_info = data;
3533 break;
3534 case MSR_MISC_FEATURES_ENABLES:
3535 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
3536 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3537 !supports_cpuid_fault(vcpu)))
3538 return 1;
3539 vcpu->arch.msr_misc_features_enables = data;
3540 break;
3541 default:
3542 if (kvm_pmu_is_valid_msr(vcpu, msr))
3543 return kvm_pmu_set_msr(vcpu, msr_info);
3544 return KVM_MSR_RET_INVALID;
3545 }
3546 return 0;
3547}
3548EXPORT_SYMBOL_GPL(kvm_set_msr_common);
3549
3550static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
3551{
3552 u64 data;
3553 u64 mcg_cap = vcpu->arch.mcg_cap;
3554 unsigned bank_num = mcg_cap & 0xff;
3555
3556 switch (msr) {
3557 case MSR_IA32_P5_MC_ADDR:
3558 case MSR_IA32_P5_MC_TYPE:
3559 data = 0;
3560 break;
3561 case MSR_IA32_MCG_CAP:
3562 data = vcpu->arch.mcg_cap;
3563 break;
3564 case MSR_IA32_MCG_CTL:
3565 if (!(mcg_cap & MCG_CTL_P) && !host)
3566 return 1;
3567 data = vcpu->arch.mcg_ctl;
3568 break;
3569 case MSR_IA32_MCG_STATUS:
3570 data = vcpu->arch.mcg_status;
3571 break;
3572 default:
3573 if (msr >= MSR_IA32_MC0_CTL &&
3574 msr < MSR_IA32_MCx_CTL(bank_num)) {
3575 u32 offset = array_index_nospec(
3576 msr - MSR_IA32_MC0_CTL,
3577 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3578
3579 data = vcpu->arch.mce_banks[offset];
3580 break;
3581 }
3582 return 1;
3583 }
3584 *pdata = data;
3585 return 0;
3586}
3587
3588int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3589{
3590 switch (msr_info->index) {
3591 case MSR_IA32_PLATFORM_ID:
3592 case MSR_IA32_EBL_CR_POWERON:
3593 case MSR_IA32_LASTBRANCHFROMIP:
3594 case MSR_IA32_LASTBRANCHTOIP:
3595 case MSR_IA32_LASTINTFROMIP:
3596 case MSR_IA32_LASTINTTOIP:
3597 case MSR_AMD64_SYSCFG:
3598 case MSR_K8_TSEG_ADDR:
3599 case MSR_K8_TSEG_MASK:
3600 case MSR_VM_HSAVE_PA:
3601 case MSR_K8_INT_PENDING_MSG:
3602 case MSR_AMD64_NB_CFG:
3603 case MSR_FAM10H_MMIO_CONF_BASE:
3604 case MSR_AMD64_BU_CFG2:
3605 case MSR_IA32_PERF_CTL:
3606 case MSR_AMD64_DC_CFG:
3607 case MSR_F15H_EX_CFG:
3608
3609
3610
3611
3612
3613
3614 case MSR_RAPL_POWER_UNIT:
3615 case MSR_PP0_ENERGY_STATUS:
3616 case MSR_PP1_ENERGY_STATUS:
3617 case MSR_PKG_ENERGY_STATUS:
3618 case MSR_DRAM_ENERGY_STATUS:
3619 msr_info->data = 0;
3620 break;
3621 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3622 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3623 return kvm_pmu_get_msr(vcpu, msr_info);
3624 if (!msr_info->host_initiated)
3625 return 1;
3626 msr_info->data = 0;
3627 break;
3628 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3629 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3630 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3631 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3632 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3633 return kvm_pmu_get_msr(vcpu, msr_info);
3634 msr_info->data = 0;
3635 break;
3636 case MSR_IA32_UCODE_REV:
3637 msr_info->data = vcpu->arch.microcode_version;
3638 break;
3639 case MSR_IA32_ARCH_CAPABILITIES:
3640 if (!msr_info->host_initiated &&
3641 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3642 return 1;
3643 msr_info->data = vcpu->arch.arch_capabilities;
3644 break;
3645 case MSR_IA32_PERF_CAPABILITIES:
3646 if (!msr_info->host_initiated &&
3647 !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
3648 return 1;
3649 msr_info->data = vcpu->arch.perf_capabilities;
3650 break;
3651 case MSR_IA32_POWER_CTL:
3652 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
3653 break;
3654 case MSR_IA32_TSC: {
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664 u64 offset, ratio;
3665
3666 if (msr_info->host_initiated) {
3667 offset = vcpu->arch.l1_tsc_offset;
3668 ratio = vcpu->arch.l1_tsc_scaling_ratio;
3669 } else {
3670 offset = vcpu->arch.tsc_offset;
3671 ratio = vcpu->arch.tsc_scaling_ratio;
3672 }
3673
3674 msr_info->data = kvm_scale_tsc(vcpu, rdtsc(), ratio) + offset;
3675 break;
3676 }
3677 case MSR_MTRRcap:
3678 case 0x200 ... 0x2ff:
3679 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
3680 case 0xcd:
3681 msr_info->data = 3;
3682 break;
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694 case MSR_EBC_FREQUENCY_ID:
3695 msr_info->data = 1 << 24;
3696 break;
3697 case MSR_IA32_APICBASE:
3698 msr_info->data = kvm_get_apic_base(vcpu);
3699 break;
3700 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3701 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
3702 case MSR_IA32_TSC_DEADLINE:
3703 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
3704 break;
3705 case MSR_IA32_TSC_ADJUST:
3706 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
3707 break;
3708 case MSR_IA32_MISC_ENABLE:
3709 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
3710 break;
3711 case MSR_IA32_SMBASE:
3712 if (!msr_info->host_initiated)
3713 return 1;
3714 msr_info->data = vcpu->arch.smbase;
3715 break;
3716 case MSR_SMI_COUNT:
3717 msr_info->data = vcpu->arch.smi_count;
3718 break;
3719 case MSR_IA32_PERF_STATUS:
3720
3721 msr_info->data = 1000ULL;
3722
3723 msr_info->data |= (((uint64_t)4ULL) << 40);
3724 break;
3725 case MSR_EFER:
3726 msr_info->data = vcpu->arch.efer;
3727 break;
3728 case MSR_KVM_WALL_CLOCK:
3729 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3730 return 1;
3731
3732 msr_info->data = vcpu->kvm->arch.wall_clock;
3733 break;
3734 case MSR_KVM_WALL_CLOCK_NEW:
3735 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3736 return 1;
3737
3738 msr_info->data = vcpu->kvm->arch.wall_clock;
3739 break;
3740 case MSR_KVM_SYSTEM_TIME:
3741 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3742 return 1;
3743
3744 msr_info->data = vcpu->arch.time;
3745 break;
3746 case MSR_KVM_SYSTEM_TIME_NEW:
3747 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3748 return 1;
3749
3750 msr_info->data = vcpu->arch.time;
3751 break;
3752 case MSR_KVM_ASYNC_PF_EN:
3753 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3754 return 1;
3755
3756 msr_info->data = vcpu->arch.apf.msr_en_val;
3757 break;
3758 case MSR_KVM_ASYNC_PF_INT:
3759 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3760 return 1;
3761
3762 msr_info->data = vcpu->arch.apf.msr_int_val;
3763 break;
3764 case MSR_KVM_ASYNC_PF_ACK:
3765 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3766 return 1;
3767
3768 msr_info->data = 0;
3769 break;
3770 case MSR_KVM_STEAL_TIME:
3771 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3772 return 1;
3773
3774 msr_info->data = vcpu->arch.st.msr_val;
3775 break;
3776 case MSR_KVM_PV_EOI_EN:
3777 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3778 return 1;
3779
3780 msr_info->data = vcpu->arch.pv_eoi.msr_val;
3781 break;
3782 case MSR_KVM_POLL_CONTROL:
3783 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3784 return 1;
3785
3786 msr_info->data = vcpu->arch.msr_kvm_poll_control;
3787 break;
3788 case MSR_IA32_P5_MC_ADDR:
3789 case MSR_IA32_P5_MC_TYPE:
3790 case MSR_IA32_MCG_CAP:
3791 case MSR_IA32_MCG_CTL:
3792 case MSR_IA32_MCG_STATUS:
3793 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3794 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
3795 msr_info->host_initiated);
3796 case MSR_IA32_XSS:
3797 if (!msr_info->host_initiated &&
3798 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3799 return 1;
3800 msr_info->data = vcpu->arch.ia32_xss;
3801 break;
3802 case MSR_K7_CLK_CTL:
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812 msr_info->data = 0x20000000;
3813 break;
3814 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3815 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3816 case HV_X64_MSR_SYNDBG_OPTIONS:
3817 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3818 case HV_X64_MSR_CRASH_CTL:
3819 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3820 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3821 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3822 case HV_X64_MSR_TSC_EMULATION_STATUS:
3823 return kvm_hv_get_msr_common(vcpu,
3824 msr_info->index, &msr_info->data,
3825 msr_info->host_initiated);
3826 case MSR_IA32_BBL_CR_CTL3:
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837 msr_info->data = 0xbe702111;
3838 break;
3839 case MSR_AMD64_OSVW_ID_LENGTH:
3840 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3841 return 1;
3842 msr_info->data = vcpu->arch.osvw.length;
3843 break;
3844 case MSR_AMD64_OSVW_STATUS:
3845 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3846 return 1;
3847 msr_info->data = vcpu->arch.osvw.status;
3848 break;
3849 case MSR_PLATFORM_INFO:
3850 if (!msr_info->host_initiated &&
3851 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
3852 return 1;
3853 msr_info->data = vcpu->arch.msr_platform_info;
3854 break;
3855 case MSR_MISC_FEATURES_ENABLES:
3856 msr_info->data = vcpu->arch.msr_misc_features_enables;
3857 break;
3858 case MSR_K7_HWCR:
3859 msr_info->data = vcpu->arch.msr_hwcr;
3860 break;
3861 default:
3862 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3863 return kvm_pmu_get_msr(vcpu, msr_info);
3864 return KVM_MSR_RET_INVALID;
3865 }
3866 return 0;
3867}
3868EXPORT_SYMBOL_GPL(kvm_get_msr_common);
3869
3870
3871
3872
3873
3874
3875static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
3876 struct kvm_msr_entry *entries,
3877 int (*do_msr)(struct kvm_vcpu *vcpu,
3878 unsigned index, u64 *data))
3879{
3880 int i;
3881
3882 for (i = 0; i < msrs->nmsrs; ++i)
3883 if (do_msr(vcpu, entries[i].index, &entries[i].data))
3884 break;
3885
3886 return i;
3887}
3888
3889
3890
3891
3892
3893
3894static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
3895 int (*do_msr)(struct kvm_vcpu *vcpu,
3896 unsigned index, u64 *data),
3897 int writeback)
3898{
3899 struct kvm_msrs msrs;
3900 struct kvm_msr_entry *entries;
3901 int r, n;
3902 unsigned size;
3903
3904 r = -EFAULT;
3905 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
3906 goto out;
3907
3908 r = -E2BIG;
3909 if (msrs.nmsrs >= MAX_IO_MSRS)
3910 goto out;
3911
3912 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
3913 entries = memdup_user(user_msrs->entries, size);
3914 if (IS_ERR(entries)) {
3915 r = PTR_ERR(entries);
3916 goto out;
3917 }
3918
3919 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
3920 if (r < 0)
3921 goto out_free;
3922
3923 r = -EFAULT;
3924 if (writeback && copy_to_user(user_msrs->entries, entries, size))
3925 goto out_free;
3926
3927 r = n;
3928
3929out_free:
3930 kfree(entries);
3931out:
3932 return r;
3933}
3934
3935static inline bool kvm_can_mwait_in_guest(void)
3936{
3937 return boot_cpu_has(X86_FEATURE_MWAIT) &&
3938 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
3939 boot_cpu_has(X86_FEATURE_ARAT);
3940}
3941
3942static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
3943 struct kvm_cpuid2 __user *cpuid_arg)
3944{
3945 struct kvm_cpuid2 cpuid;
3946 int r;
3947
3948 r = -EFAULT;
3949 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
3950 return r;
3951
3952 r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3953 if (r)
3954 return r;
3955
3956 r = -EFAULT;
3957 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
3958 return r;
3959
3960 return 0;
3961}
3962
3963int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
3964{
3965 int r = 0;
3966
3967 switch (ext) {
3968 case KVM_CAP_IRQCHIP:
3969 case KVM_CAP_HLT:
3970 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
3971 case KVM_CAP_SET_TSS_ADDR:
3972 case KVM_CAP_EXT_CPUID:
3973 case KVM_CAP_EXT_EMUL_CPUID:
3974 case KVM_CAP_CLOCKSOURCE:
3975 case KVM_CAP_PIT:
3976 case KVM_CAP_NOP_IO_DELAY:
3977 case KVM_CAP_MP_STATE:
3978 case KVM_CAP_SYNC_MMU:
3979 case KVM_CAP_USER_NMI:
3980 case KVM_CAP_REINJECT_CONTROL:
3981 case KVM_CAP_IRQ_INJECT_STATUS:
3982 case KVM_CAP_IOEVENTFD:
3983 case KVM_CAP_IOEVENTFD_NO_LENGTH:
3984 case KVM_CAP_PIT2:
3985 case KVM_CAP_PIT_STATE2:
3986 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
3987 case KVM_CAP_VCPU_EVENTS:
3988 case KVM_CAP_HYPERV:
3989 case KVM_CAP_HYPERV_VAPIC:
3990 case KVM_CAP_HYPERV_SPIN:
3991 case KVM_CAP_HYPERV_SYNIC:
3992 case KVM_CAP_HYPERV_SYNIC2:
3993 case KVM_CAP_HYPERV_VP_INDEX:
3994 case KVM_CAP_HYPERV_EVENTFD:
3995 case KVM_CAP_HYPERV_TLBFLUSH:
3996 case KVM_CAP_HYPERV_SEND_IPI:
3997 case KVM_CAP_HYPERV_CPUID:
3998 case KVM_CAP_HYPERV_ENFORCE_CPUID:
3999 case KVM_CAP_SYS_HYPERV_CPUID:
4000 case KVM_CAP_PCI_SEGMENT:
4001 case KVM_CAP_DEBUGREGS:
4002 case KVM_CAP_X86_ROBUST_SINGLESTEP:
4003 case KVM_CAP_XSAVE:
4004 case KVM_CAP_ASYNC_PF:
4005 case KVM_CAP_ASYNC_PF_INT:
4006 case KVM_CAP_GET_TSC_KHZ:
4007 case KVM_CAP_KVMCLOCK_CTRL:
4008 case KVM_CAP_READONLY_MEM:
4009 case KVM_CAP_HYPERV_TIME:
4010 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
4011 case KVM_CAP_TSC_DEADLINE_TIMER:
4012 case KVM_CAP_DISABLE_QUIRKS:
4013 case KVM_CAP_SET_BOOT_CPU_ID:
4014 case KVM_CAP_SPLIT_IRQCHIP:
4015 case KVM_CAP_IMMEDIATE_EXIT:
4016 case KVM_CAP_PMU_EVENT_FILTER:
4017 case KVM_CAP_GET_MSR_FEATURES:
4018 case KVM_CAP_MSR_PLATFORM_INFO:
4019 case KVM_CAP_EXCEPTION_PAYLOAD:
4020 case KVM_CAP_SET_GUEST_DEBUG:
4021 case KVM_CAP_LAST_CPU:
4022 case KVM_CAP_X86_USER_SPACE_MSR:
4023 case KVM_CAP_X86_MSR_FILTER:
4024 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
4025#ifdef CONFIG_X86_SGX_KVM
4026 case KVM_CAP_SGX_ATTRIBUTE:
4027#endif
4028 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
4029 case KVM_CAP_SREGS2:
4030 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
4031 r = 1;
4032 break;
4033 case KVM_CAP_EXIT_HYPERCALL:
4034 r = KVM_EXIT_HYPERCALL_VALID_MASK;
4035 break;
4036 case KVM_CAP_SET_GUEST_DEBUG2:
4037 return KVM_GUESTDBG_VALID_MASK;
4038#ifdef CONFIG_KVM_XEN
4039 case KVM_CAP_XEN_HVM:
4040 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
4041 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
4042 KVM_XEN_HVM_CONFIG_SHARED_INFO;
4043 if (sched_info_on())
4044 r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
4045 break;
4046#endif
4047 case KVM_CAP_SYNC_REGS:
4048 r = KVM_SYNC_X86_VALID_FIELDS;
4049 break;
4050 case KVM_CAP_ADJUST_CLOCK:
4051 r = KVM_CLOCK_TSC_STABLE;
4052 break;
4053 case KVM_CAP_X86_DISABLE_EXITS:
4054 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
4055 KVM_X86_DISABLE_EXITS_CSTATE;
4056 if(kvm_can_mwait_in_guest())
4057 r |= KVM_X86_DISABLE_EXITS_MWAIT;
4058 break;
4059 case KVM_CAP_X86_SMM:
4060
4061
4062
4063
4064
4065
4066
4067
4068 r = static_call(kvm_x86_has_emulated_msr)(kvm, MSR_IA32_SMBASE);
4069 break;
4070 case KVM_CAP_VAPIC:
4071 r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
4072 break;
4073 case KVM_CAP_NR_VCPUS:
4074 r = KVM_SOFT_MAX_VCPUS;
4075 break;
4076 case KVM_CAP_MAX_VCPUS:
4077 r = KVM_MAX_VCPUS;
4078 break;
4079 case KVM_CAP_MAX_VCPU_ID:
4080 r = KVM_MAX_VCPU_ID;
4081 break;
4082 case KVM_CAP_PV_MMU:
4083 r = 0;
4084 break;
4085 case KVM_CAP_MCE:
4086 r = KVM_MAX_MCE_BANKS;
4087 break;
4088 case KVM_CAP_XCRS:
4089 r = boot_cpu_has(X86_FEATURE_XSAVE);
4090 break;
4091 case KVM_CAP_TSC_CONTROL:
4092 r = kvm_has_tsc_control;
4093 break;
4094 case KVM_CAP_X2APIC_API:
4095 r = KVM_X2APIC_API_VALID_FLAGS;
4096 break;
4097 case KVM_CAP_NESTED_STATE:
4098 r = kvm_x86_ops.nested_ops->get_state ?
4099 kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
4100 break;
4101 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4102 r = kvm_x86_ops.enable_direct_tlbflush != NULL;
4103 break;
4104 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4105 r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
4106 break;
4107 case KVM_CAP_SMALLER_MAXPHYADDR:
4108 r = (int) allow_smaller_maxphyaddr;
4109 break;
4110 case KVM_CAP_STEAL_TIME:
4111 r = sched_info_on();
4112 break;
4113 case KVM_CAP_X86_BUS_LOCK_EXIT:
4114 if (kvm_has_bus_lock_exit)
4115 r = KVM_BUS_LOCK_DETECTION_OFF |
4116 KVM_BUS_LOCK_DETECTION_EXIT;
4117 else
4118 r = 0;
4119 break;
4120 default:
4121 break;
4122 }
4123 return r;
4124
4125}
4126
4127long kvm_arch_dev_ioctl(struct file *filp,
4128 unsigned int ioctl, unsigned long arg)
4129{
4130 void __user *argp = (void __user *)arg;
4131 long r;
4132
4133 switch (ioctl) {
4134 case KVM_GET_MSR_INDEX_LIST: {
4135 struct kvm_msr_list __user *user_msr_list = argp;
4136 struct kvm_msr_list msr_list;
4137 unsigned n;
4138
4139 r = -EFAULT;
4140 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4141 goto out;
4142 n = msr_list.nmsrs;
4143 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
4144 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4145 goto out;
4146 r = -E2BIG;
4147 if (n < msr_list.nmsrs)
4148 goto out;
4149 r = -EFAULT;
4150 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
4151 num_msrs_to_save * sizeof(u32)))
4152 goto out;
4153 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
4154 &emulated_msrs,
4155 num_emulated_msrs * sizeof(u32)))
4156 goto out;
4157 r = 0;
4158 break;
4159 }
4160 case KVM_GET_SUPPORTED_CPUID:
4161 case KVM_GET_EMULATED_CPUID: {
4162 struct kvm_cpuid2 __user *cpuid_arg = argp;
4163 struct kvm_cpuid2 cpuid;
4164
4165 r = -EFAULT;
4166 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4167 goto out;
4168
4169 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
4170 ioctl);
4171 if (r)
4172 goto out;
4173
4174 r = -EFAULT;
4175 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4176 goto out;
4177 r = 0;
4178 break;
4179 }
4180 case KVM_X86_GET_MCE_CAP_SUPPORTED:
4181 r = -EFAULT;
4182 if (copy_to_user(argp, &kvm_mce_cap_supported,
4183 sizeof(kvm_mce_cap_supported)))
4184 goto out;
4185 r = 0;
4186 break;
4187 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
4188 struct kvm_msr_list __user *user_msr_list = argp;
4189 struct kvm_msr_list msr_list;
4190 unsigned int n;
4191
4192 r = -EFAULT;
4193 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4194 goto out;
4195 n = msr_list.nmsrs;
4196 msr_list.nmsrs = num_msr_based_features;
4197 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4198 goto out;
4199 r = -E2BIG;
4200 if (n < msr_list.nmsrs)
4201 goto out;
4202 r = -EFAULT;
4203 if (copy_to_user(user_msr_list->indices, &msr_based_features,
4204 num_msr_based_features * sizeof(u32)))
4205 goto out;
4206 r = 0;
4207 break;
4208 }
4209 case KVM_GET_MSRS:
4210 r = msr_io(NULL, argp, do_get_msr_feature, 1);
4211 break;
4212 case KVM_GET_SUPPORTED_HV_CPUID:
4213 r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
4214 break;
4215 default:
4216 r = -EINVAL;
4217 break;
4218 }
4219out:
4220 return r;
4221}
4222
4223static void wbinvd_ipi(void *garbage)
4224{
4225 wbinvd();
4226}
4227
4228static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
4229{
4230 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
4231}
4232
4233void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4234{
4235
4236 if (need_emulate_wbinvd(vcpu)) {
4237 if (static_call(kvm_x86_has_wbinvd_exit)())
4238 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4239 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
4240 smp_call_function_single(vcpu->cpu,
4241 wbinvd_ipi, NULL, 1);
4242 }
4243
4244 static_call(kvm_x86_vcpu_load)(vcpu, cpu);
4245
4246
4247 vcpu->arch.host_pkru = read_pkru();
4248
4249
4250 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
4251 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
4252 vcpu->arch.tsc_offset_adjustment = 0;
4253 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4254 }
4255
4256 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
4257 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
4258 rdtsc() - vcpu->arch.last_host_tsc;
4259 if (tsc_delta < 0)
4260 mark_tsc_unstable("KVM discovered backwards TSC");
4261
4262 if (kvm_check_tsc_unstable()) {
4263 u64 offset = kvm_compute_l1_tsc_offset(vcpu,
4264 vcpu->arch.last_guest_tsc);
4265 kvm_vcpu_write_tsc_offset(vcpu, offset);
4266 vcpu->arch.tsc_catchup = 1;
4267 }
4268
4269 if (kvm_lapic_hv_timer_in_use(vcpu))
4270 kvm_lapic_restart_hv_timer(vcpu);
4271
4272
4273
4274
4275
4276 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
4277 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
4278 if (vcpu->cpu != cpu)
4279 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
4280 vcpu->cpu = cpu;
4281 }
4282
4283 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
4284}
4285
4286static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
4287{
4288 struct kvm_host_map map;
4289 struct kvm_steal_time *st;
4290
4291 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
4292 return;
4293
4294 if (vcpu->arch.st.preempted)
4295 return;
4296
4297 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
4298 &vcpu->arch.st.cache, true))
4299 return;
4300
4301 st = map.hva +
4302 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
4303
4304 st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
4305
4306 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
4307}
4308
4309void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
4310{
4311 int idx;
4312
4313 if (vcpu->preempted && !vcpu->arch.guest_state_protected)
4314 vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
4315
4316
4317
4318
4319
4320 idx = srcu_read_lock(&vcpu->kvm->srcu);
4321 if (kvm_xen_msr_enabled(vcpu->kvm))
4322 kvm_xen_runstate_set_preempted(vcpu);
4323 else
4324 kvm_steal_time_set_preempted(vcpu);
4325 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4326
4327 static_call(kvm_x86_vcpu_put)(vcpu);
4328 vcpu->arch.last_host_tsc = rdtsc();
4329}
4330
4331static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
4332 struct kvm_lapic_state *s)
4333{
4334 if (vcpu->arch.apicv_active)
4335 static_call(kvm_x86_sync_pir_to_irr)(vcpu);
4336
4337 return kvm_apic_get_state(vcpu, s);
4338}
4339
4340static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
4341 struct kvm_lapic_state *s)
4342{
4343 int r;
4344
4345 r = kvm_apic_set_state(vcpu, s);
4346 if (r)
4347 return r;
4348 update_cr8_intercept(vcpu);
4349
4350 return 0;
4351}
4352
4353static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
4354{
4355
4356
4357
4358
4359
4360
4361 if (kvm_cpu_has_extint(vcpu))
4362 return false;
4363
4364
4365 return (!lapic_in_kernel(vcpu) ||
4366 kvm_apic_accept_pic_intr(vcpu));
4367}
4368
4369static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
4370{
4371
4372
4373
4374
4375
4376
4377
4378 return (kvm_arch_interrupt_allowed(vcpu) &&
4379 kvm_cpu_accept_dm_intr(vcpu) &&
4380 !kvm_event_needs_reinjection(vcpu) &&
4381 !vcpu->arch.exception.pending);
4382}
4383
4384static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
4385 struct kvm_interrupt *irq)
4386{
4387 if (irq->irq >= KVM_NR_INTERRUPTS)
4388 return -EINVAL;
4389
4390 if (!irqchip_in_kernel(vcpu->kvm)) {
4391 kvm_queue_interrupt(vcpu, irq->irq, false);
4392 kvm_make_request(KVM_REQ_EVENT, vcpu);
4393 return 0;
4394 }
4395
4396
4397
4398
4399
4400 if (pic_in_kernel(vcpu->kvm))
4401 return -ENXIO;
4402
4403 if (vcpu->arch.pending_external_vector != -1)
4404 return -EEXIST;
4405
4406 vcpu->arch.pending_external_vector = irq->irq;
4407 kvm_make_request(KVM_REQ_EVENT, vcpu);
4408 return 0;
4409}
4410
4411static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
4412{
4413 kvm_inject_nmi(vcpu);
4414
4415 return 0;
4416}
4417
4418static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
4419{
4420 kvm_make_request(KVM_REQ_SMI, vcpu);
4421
4422 return 0;
4423}
4424
4425static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
4426 struct kvm_tpr_access_ctl *tac)
4427{
4428 if (tac->flags)
4429 return -EINVAL;
4430 vcpu->arch.tpr_access_reporting = !!tac->enabled;
4431 return 0;
4432}
4433
4434static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
4435 u64 mcg_cap)
4436{
4437 int r;
4438 unsigned bank_num = mcg_cap & 0xff, bank;
4439
4440 r = -EINVAL;
4441 if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
4442 goto out;
4443 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
4444 goto out;
4445 r = 0;
4446 vcpu->arch.mcg_cap = mcg_cap;
4447
4448 if (mcg_cap & MCG_CTL_P)
4449 vcpu->arch.mcg_ctl = ~(u64)0;
4450
4451 for (bank = 0; bank < bank_num; bank++)
4452 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
4453
4454 static_call(kvm_x86_setup_mce)(vcpu);
4455out:
4456 return r;
4457}
4458
4459static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
4460 struct kvm_x86_mce *mce)
4461{
4462 u64 mcg_cap = vcpu->arch.mcg_cap;
4463 unsigned bank_num = mcg_cap & 0xff;
4464 u64 *banks = vcpu->arch.mce_banks;
4465
4466 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
4467 return -EINVAL;
4468
4469
4470
4471
4472 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
4473 vcpu->arch.mcg_ctl != ~(u64)0)
4474 return 0;
4475 banks += 4 * mce->bank;
4476
4477
4478
4479
4480 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
4481 return 0;
4482 if (mce->status & MCI_STATUS_UC) {
4483 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
4484 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
4485 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
4486 return 0;
4487 }
4488 if (banks[1] & MCI_STATUS_VAL)
4489 mce->status |= MCI_STATUS_OVER;
4490 banks[2] = mce->addr;
4491 banks[3] = mce->misc;
4492 vcpu->arch.mcg_status = mce->mcg_status;
4493 banks[1] = mce->status;
4494 kvm_queue_exception(vcpu, MC_VECTOR);
4495 } else if (!(banks[1] & MCI_STATUS_VAL)
4496 || !(banks[1] & MCI_STATUS_UC)) {
4497 if (banks[1] & MCI_STATUS_VAL)
4498 mce->status |= MCI_STATUS_OVER;
4499 banks[2] = mce->addr;
4500 banks[3] = mce->misc;
4501 banks[1] = mce->status;
4502 } else
4503 banks[1] |= MCI_STATUS_OVER;
4504 return 0;
4505}
4506
4507static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
4508 struct kvm_vcpu_events *events)
4509{
4510 process_nmi(vcpu);
4511
4512 if (kvm_check_request(KVM_REQ_SMI, vcpu))
4513 process_smi(vcpu);
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526 if (!vcpu->kvm->arch.exception_payload_enabled &&
4527 vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
4528 kvm_deliver_exception_payload(vcpu);
4529
4530
4531
4532
4533
4534
4535
4536 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
4537 events->exception.injected = 0;
4538 events->exception.pending = 0;
4539 } else {
4540 events->exception.injected = vcpu->arch.exception.injected;
4541 events->exception.pending = vcpu->arch.exception.pending;
4542
4543
4544
4545
4546
4547 if (!vcpu->kvm->arch.exception_payload_enabled)
4548 events->exception.injected |=
4549 vcpu->arch.exception.pending;
4550 }
4551 events->exception.nr = vcpu->arch.exception.nr;
4552 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
4553 events->exception.error_code = vcpu->arch.exception.error_code;
4554 events->exception_has_payload = vcpu->arch.exception.has_payload;
4555 events->exception_payload = vcpu->arch.exception.payload;
4556
4557 events->interrupt.injected =
4558 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
4559 events->interrupt.nr = vcpu->arch.interrupt.nr;
4560 events->interrupt.soft = 0;
4561 events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
4562
4563 events->nmi.injected = vcpu->arch.nmi_injected;
4564 events->nmi.pending = vcpu->arch.nmi_pending != 0;
4565 events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
4566 events->nmi.pad = 0;
4567
4568 events->sipi_vector = 0;
4569
4570 events->smi.smm = is_smm(vcpu);
4571 events->smi.pending = vcpu->arch.smi_pending;
4572 events->smi.smm_inside_nmi =
4573 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
4574 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
4575
4576 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
4577 | KVM_VCPUEVENT_VALID_SHADOW
4578 | KVM_VCPUEVENT_VALID_SMM);
4579 if (vcpu->kvm->arch.exception_payload_enabled)
4580 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
4581
4582 memset(&events->reserved, 0, sizeof(events->reserved));
4583}
4584
4585static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
4586
4587static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
4588 struct kvm_vcpu_events *events)
4589{
4590 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
4591 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
4592 | KVM_VCPUEVENT_VALID_SHADOW
4593 | KVM_VCPUEVENT_VALID_SMM
4594 | KVM_VCPUEVENT_VALID_PAYLOAD))
4595 return -EINVAL;
4596
4597 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
4598 if (!vcpu->kvm->arch.exception_payload_enabled)
4599 return -EINVAL;
4600 if (events->exception.pending)
4601 events->exception.injected = 0;
4602 else
4603 events->exception_has_payload = 0;
4604 } else {
4605 events->exception.pending = 0;
4606 events->exception_has_payload = 0;
4607 }
4608
4609 if ((events->exception.injected || events->exception.pending) &&
4610 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
4611 return -EINVAL;
4612
4613
4614 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
4615 (events->smi.smm || events->smi.pending) &&
4616 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4617 return -EINVAL;
4618
4619 process_nmi(vcpu);
4620 vcpu->arch.exception.injected = events->exception.injected;
4621 vcpu->arch.exception.pending = events->exception.pending;
4622 vcpu->arch.exception.nr = events->exception.nr;
4623 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
4624 vcpu->arch.exception.error_code = events->exception.error_code;
4625 vcpu->arch.exception.has_payload = events->exception_has_payload;
4626 vcpu->arch.exception.payload = events->exception_payload;
4627
4628 vcpu->arch.interrupt.injected = events->interrupt.injected;
4629 vcpu->arch.interrupt.nr = events->interrupt.nr;
4630 vcpu->arch.interrupt.soft = events->interrupt.soft;
4631 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
4632 static_call(kvm_x86_set_interrupt_shadow)(vcpu,
4633 events->interrupt.shadow);
4634
4635 vcpu->arch.nmi_injected = events->nmi.injected;
4636 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
4637 vcpu->arch.nmi_pending = events->nmi.pending;
4638 static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
4639
4640 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
4641 lapic_in_kernel(vcpu))
4642 vcpu->arch.apic->sipi_vector = events->sipi_vector;
4643
4644 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
4645 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
4646 kvm_smm_changed(vcpu, events->smi.smm);
4647
4648 vcpu->arch.smi_pending = events->smi.pending;
4649
4650 if (events->smi.smm) {
4651 if (events->smi.smm_inside_nmi)
4652 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
4653 else
4654 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
4655 }
4656
4657 if (lapic_in_kernel(vcpu)) {
4658 if (events->smi.latched_init)
4659 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4660 else
4661 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4662 }
4663 }
4664
4665 kvm_make_request(KVM_REQ_EVENT, vcpu);
4666
4667 return 0;
4668}
4669
4670static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
4671 struct kvm_debugregs *dbgregs)
4672{
4673 unsigned long val;
4674
4675 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
4676 kvm_get_dr(vcpu, 6, &val);
4677 dbgregs->dr6 = val;
4678 dbgregs->dr7 = vcpu->arch.dr7;
4679 dbgregs->flags = 0;
4680 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
4681}
4682
4683static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
4684 struct kvm_debugregs *dbgregs)
4685{
4686 if (dbgregs->flags)
4687 return -EINVAL;
4688
4689 if (!kvm_dr6_valid(dbgregs->dr6))
4690 return -EINVAL;
4691 if (!kvm_dr7_valid(dbgregs->dr7))
4692 return -EINVAL;
4693
4694 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
4695 kvm_update_dr0123(vcpu);
4696 vcpu->arch.dr6 = dbgregs->dr6;
4697 vcpu->arch.dr7 = dbgregs->dr7;
4698 kvm_update_dr7(vcpu);
4699
4700 return 0;
4701}
4702
4703#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
4704
4705static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
4706{
4707 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
4708 u64 xstate_bv = xsave->header.xfeatures;
4709 u64 valid;
4710
4711
4712
4713
4714
4715 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
4716
4717
4718 xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
4719 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
4720
4721
4722
4723
4724
4725 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
4726 while (valid) {
4727 u32 size, offset, ecx, edx;
4728 u64 xfeature_mask = valid & -valid;
4729 int xfeature_nr = fls64(xfeature_mask) - 1;
4730 void *src;
4731
4732 cpuid_count(XSTATE_CPUID, xfeature_nr,
4733 &size, &offset, &ecx, &edx);
4734
4735 if (xfeature_nr == XFEATURE_PKRU) {
4736 memcpy(dest + offset, &vcpu->arch.pkru,
4737 sizeof(vcpu->arch.pkru));
4738 } else {
4739 src = get_xsave_addr(xsave, xfeature_nr);
4740 if (src)
4741 memcpy(dest + offset, src, size);
4742 }
4743
4744 valid -= xfeature_mask;
4745 }
4746}
4747
4748static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
4749{
4750 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
4751 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
4752 u64 valid;
4753
4754
4755
4756
4757
4758 memcpy(xsave, src, XSAVE_HDR_OFFSET);
4759
4760
4761 xsave->header.xfeatures = xstate_bv;
4762 if (boot_cpu_has(X86_FEATURE_XSAVES))
4763 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
4764
4765
4766
4767
4768
4769 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
4770 while (valid) {
4771 u32 size, offset, ecx, edx;
4772 u64 xfeature_mask = valid & -valid;
4773 int xfeature_nr = fls64(xfeature_mask) - 1;
4774
4775 cpuid_count(XSTATE_CPUID, xfeature_nr,
4776 &size, &offset, &ecx, &edx);
4777
4778 if (xfeature_nr == XFEATURE_PKRU) {
4779 memcpy(&vcpu->arch.pkru, src + offset,
4780 sizeof(vcpu->arch.pkru));
4781 } else {
4782 void *dest = get_xsave_addr(xsave, xfeature_nr);
4783
4784 if (dest)
4785 memcpy(dest, src + offset, size);
4786 }
4787
4788 valid -= xfeature_mask;
4789 }
4790}
4791
4792static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
4793 struct kvm_xsave *guest_xsave)
4794{
4795 if (!vcpu->arch.guest_fpu)
4796 return;
4797
4798 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4799 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
4800 fill_xsave((u8 *) guest_xsave->region, vcpu);
4801 } else {
4802 memcpy(guest_xsave->region,
4803 &vcpu->arch.guest_fpu->state.fxsave,
4804 sizeof(struct fxregs_state));
4805 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
4806 XFEATURE_MASK_FPSSE;
4807 }
4808}
4809
4810#define XSAVE_MXCSR_OFFSET 24
4811
4812static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
4813 struct kvm_xsave *guest_xsave)
4814{
4815 u64 xstate_bv;
4816 u32 mxcsr;
4817
4818 if (!vcpu->arch.guest_fpu)
4819 return 0;
4820
4821 xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
4822 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
4823
4824 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4825
4826
4827
4828
4829
4830 if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask)
4831 return -EINVAL;
4832 load_xsave(vcpu, (u8 *)guest_xsave->region);
4833 } else {
4834 if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
4835 mxcsr & ~mxcsr_feature_mask)
4836 return -EINVAL;
4837 memcpy(&vcpu->arch.guest_fpu->state.fxsave,
4838 guest_xsave->region, sizeof(struct fxregs_state));
4839 }
4840 return 0;
4841}
4842
4843static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
4844 struct kvm_xcrs *guest_xcrs)
4845{
4846 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
4847 guest_xcrs->nr_xcrs = 0;
4848 return;
4849 }
4850
4851 guest_xcrs->nr_xcrs = 1;
4852 guest_xcrs->flags = 0;
4853 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
4854 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
4855}
4856
4857static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
4858 struct kvm_xcrs *guest_xcrs)
4859{
4860 int i, r = 0;
4861
4862 if (!boot_cpu_has(X86_FEATURE_XSAVE))
4863 return -EINVAL;
4864
4865 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
4866 return -EINVAL;
4867
4868 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
4869
4870 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
4871 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
4872 guest_xcrs->xcrs[i].value);
4873 break;
4874 }
4875 if (r)
4876 r = -EINVAL;
4877 return r;
4878}
4879
4880
4881
4882
4883
4884
4885
4886static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
4887{
4888 if (!vcpu->arch.pv_time_enabled)
4889 return -EINVAL;
4890 vcpu->arch.pvclock_set_guest_stopped_request = true;
4891 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4892 return 0;
4893}
4894
4895static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4896 struct kvm_enable_cap *cap)
4897{
4898 int r;
4899 uint16_t vmcs_version;
4900 void __user *user_ptr;
4901
4902 if (cap->flags)
4903 return -EINVAL;
4904
4905 switch (cap->cap) {
4906 case KVM_CAP_HYPERV_SYNIC2:
4907 if (cap->args[0])
4908 return -EINVAL;
4909 fallthrough;
4910
4911 case KVM_CAP_HYPERV_SYNIC:
4912 if (!irqchip_in_kernel(vcpu->kvm))
4913 return -EINVAL;
4914 return kvm_hv_activate_synic(vcpu, cap->cap ==
4915 KVM_CAP_HYPERV_SYNIC2);
4916 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4917 if (!kvm_x86_ops.nested_ops->enable_evmcs)
4918 return -ENOTTY;
4919 r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
4920 if (!r) {
4921 user_ptr = (void __user *)(uintptr_t)cap->args[0];
4922 if (copy_to_user(user_ptr, &vmcs_version,
4923 sizeof(vmcs_version)))
4924 r = -EFAULT;
4925 }
4926 return r;
4927 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4928 if (!kvm_x86_ops.enable_direct_tlbflush)
4929 return -ENOTTY;
4930
4931 return static_call(kvm_x86_enable_direct_tlbflush)(vcpu);
4932
4933 case KVM_CAP_HYPERV_ENFORCE_CPUID:
4934 return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]);
4935
4936 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
4937 vcpu->arch.pv_cpuid.enforce = cap->args[0];
4938 if (vcpu->arch.pv_cpuid.enforce)
4939 kvm_update_pv_runtime(vcpu);
4940
4941 return 0;
4942 default:
4943 return -EINVAL;
4944 }
4945}
4946
4947long kvm_arch_vcpu_ioctl(struct file *filp,
4948 unsigned int ioctl, unsigned long arg)
4949{
4950 struct kvm_vcpu *vcpu = filp->private_data;
4951 void __user *argp = (void __user *)arg;
4952 int r;
4953 union {
4954 struct kvm_sregs2 *sregs2;
4955 struct kvm_lapic_state *lapic;
4956 struct kvm_xsave *xsave;
4957 struct kvm_xcrs *xcrs;
4958 void *buffer;
4959 } u;
4960
4961 vcpu_load(vcpu);
4962
4963 u.buffer = NULL;
4964 switch (ioctl) {
4965 case KVM_GET_LAPIC: {
4966 r = -EINVAL;
4967 if (!lapic_in_kernel(vcpu))
4968 goto out;
4969 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
4970 GFP_KERNEL_ACCOUNT);
4971
4972 r = -ENOMEM;
4973 if (!u.lapic)
4974 goto out;
4975 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
4976 if (r)
4977 goto out;
4978 r = -EFAULT;
4979 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
4980 goto out;
4981 r = 0;
4982 break;
4983 }
4984 case KVM_SET_LAPIC: {
4985 r = -EINVAL;
4986 if (!lapic_in_kernel(vcpu))
4987 goto out;
4988 u.lapic = memdup_user(argp, sizeof(*u.lapic));
4989 if (IS_ERR(u.lapic)) {
4990 r = PTR_ERR(u.lapic);
4991 goto out_nofree;
4992 }
4993
4994 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
4995 break;
4996 }
4997 case KVM_INTERRUPT: {
4998 struct kvm_interrupt irq;
4999
5000 r = -EFAULT;
5001 if (copy_from_user(&irq, argp, sizeof(irq)))
5002 goto out;
5003 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
5004 break;
5005 }
5006 case KVM_NMI: {
5007 r = kvm_vcpu_ioctl_nmi(vcpu);
5008 break;
5009 }
5010 case KVM_SMI: {
5011 r = kvm_vcpu_ioctl_smi(vcpu);
5012 break;
5013 }
5014 case KVM_SET_CPUID: {
5015 struct kvm_cpuid __user *cpuid_arg = argp;
5016 struct kvm_cpuid cpuid;
5017
5018 r = -EFAULT;
5019 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5020 goto out;
5021 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
5022 break;
5023 }
5024 case KVM_SET_CPUID2: {
5025 struct kvm_cpuid2 __user *cpuid_arg = argp;
5026 struct kvm_cpuid2 cpuid;
5027
5028 r = -EFAULT;
5029 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5030 goto out;
5031 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
5032 cpuid_arg->entries);
5033 break;
5034 }
5035 case KVM_GET_CPUID2: {
5036 struct kvm_cpuid2 __user *cpuid_arg = argp;
5037 struct kvm_cpuid2 cpuid;
5038
5039 r = -EFAULT;
5040 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5041 goto out;
5042 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
5043 cpuid_arg->entries);
5044 if (r)
5045 goto out;
5046 r = -EFAULT;
5047 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
5048 goto out;
5049 r = 0;
5050 break;
5051 }
5052 case KVM_GET_MSRS: {
5053 int idx = srcu_read_lock(&vcpu->kvm->srcu);
5054 r = msr_io(vcpu, argp, do_get_msr, 1);
5055 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5056 break;
5057 }
5058 case KVM_SET_MSRS: {
5059 int idx = srcu_read_lock(&vcpu->kvm->srcu);
5060 r = msr_io(vcpu, argp, do_set_msr, 0);
5061 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5062 break;
5063 }
5064 case KVM_TPR_ACCESS_REPORTING: {
5065 struct kvm_tpr_access_ctl tac;
5066
5067 r = -EFAULT;
5068 if (copy_from_user(&tac, argp, sizeof(tac)))
5069 goto out;
5070 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
5071 if (r)
5072 goto out;
5073 r = -EFAULT;
5074 if (copy_to_user(argp, &tac, sizeof(tac)))
5075 goto out;
5076 r = 0;
5077 break;
5078 };
5079 case KVM_SET_VAPIC_ADDR: {
5080 struct kvm_vapic_addr va;
5081 int idx;
5082
5083 r = -EINVAL;
5084 if (!lapic_in_kernel(vcpu))
5085 goto out;
5086 r = -EFAULT;
5087 if (copy_from_user(&va, argp, sizeof(va)))
5088 goto out;
5089 idx = srcu_read_lock(&vcpu->kvm->srcu);
5090 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
5091 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5092 break;
5093 }
5094 case KVM_X86_SETUP_MCE: {
5095 u64 mcg_cap;
5096
5097 r = -EFAULT;
5098 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
5099 goto out;
5100 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
5101 break;
5102 }
5103 case KVM_X86_SET_MCE: {
5104 struct kvm_x86_mce mce;
5105
5106 r = -EFAULT;
5107 if (copy_from_user(&mce, argp, sizeof(mce)))
5108 goto out;
5109 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
5110 break;
5111 }
5112 case KVM_GET_VCPU_EVENTS: {
5113 struct kvm_vcpu_events events;
5114
5115 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
5116
5117 r = -EFAULT;
5118 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
5119 break;
5120 r = 0;
5121 break;
5122 }
5123 case KVM_SET_VCPU_EVENTS: {
5124 struct kvm_vcpu_events events;
5125
5126 r = -EFAULT;
5127 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
5128 break;
5129
5130 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
5131 break;
5132 }
5133 case KVM_GET_DEBUGREGS: {
5134 struct kvm_debugregs dbgregs;
5135
5136 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
5137
5138 r = -EFAULT;
5139 if (copy_to_user(argp, &dbgregs,
5140 sizeof(struct kvm_debugregs)))
5141 break;
5142 r = 0;
5143 break;
5144 }
5145 case KVM_SET_DEBUGREGS: {
5146 struct kvm_debugregs dbgregs;
5147
5148 r = -EFAULT;
5149 if (copy_from_user(&dbgregs, argp,
5150 sizeof(struct kvm_debugregs)))
5151 break;
5152
5153 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
5154 break;
5155 }
5156 case KVM_GET_XSAVE: {
5157 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
5158 r = -ENOMEM;
5159 if (!u.xsave)
5160 break;
5161
5162 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
5163
5164 r = -EFAULT;
5165 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
5166 break;
5167 r = 0;
5168 break;
5169 }
5170 case KVM_SET_XSAVE: {
5171 u.xsave = memdup_user(argp, sizeof(*u.xsave));
5172 if (IS_ERR(u.xsave)) {
5173 r = PTR_ERR(u.xsave);
5174 goto out_nofree;
5175 }
5176
5177 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
5178 break;
5179 }
5180 case KVM_GET_XCRS: {
5181 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
5182 r = -ENOMEM;
5183 if (!u.xcrs)
5184 break;
5185
5186 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
5187
5188 r = -EFAULT;
5189 if (copy_to_user(argp, u.xcrs,
5190 sizeof(struct kvm_xcrs)))
5191 break;
5192 r = 0;
5193 break;
5194 }
5195 case KVM_SET_XCRS: {
5196 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
5197 if (IS_ERR(u.xcrs)) {
5198 r = PTR_ERR(u.xcrs);
5199 goto out_nofree;
5200 }
5201
5202 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
5203 break;
5204 }
5205 case KVM_SET_TSC_KHZ: {
5206 u32 user_tsc_khz;
5207
5208 r = -EINVAL;
5209 user_tsc_khz = (u32)arg;
5210
5211 if (kvm_has_tsc_control &&
5212 user_tsc_khz >= kvm_max_guest_tsc_khz)
5213 goto out;
5214
5215 if (user_tsc_khz == 0)
5216 user_tsc_khz = tsc_khz;
5217
5218 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
5219 r = 0;
5220
5221 goto out;
5222 }
5223 case KVM_GET_TSC_KHZ: {
5224 r = vcpu->arch.virtual_tsc_khz;
5225 goto out;
5226 }
5227 case KVM_KVMCLOCK_CTRL: {
5228 r = kvm_set_guest_paused(vcpu);
5229 goto out;
5230 }
5231 case KVM_ENABLE_CAP: {
5232 struct kvm_enable_cap cap;
5233
5234 r = -EFAULT;
5235 if (copy_from_user(&cap, argp, sizeof(cap)))
5236 goto out;
5237 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5238 break;
5239 }
5240 case KVM_GET_NESTED_STATE: {
5241 struct kvm_nested_state __user *user_kvm_nested_state = argp;
5242 u32 user_data_size;
5243
5244 r = -EINVAL;
5245 if (!kvm_x86_ops.nested_ops->get_state)
5246 break;
5247
5248 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
5249 r = -EFAULT;
5250 if (get_user(user_data_size, &user_kvm_nested_state->size))
5251 break;
5252
5253 r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
5254 user_data_size);
5255 if (r < 0)
5256 break;
5257
5258 if (r > user_data_size) {
5259 if (put_user(r, &user_kvm_nested_state->size))
5260 r = -EFAULT;
5261 else
5262 r = -E2BIG;
5263 break;
5264 }
5265
5266 r = 0;
5267 break;
5268 }
5269 case KVM_SET_NESTED_STATE: {
5270 struct kvm_nested_state __user *user_kvm_nested_state = argp;
5271 struct kvm_nested_state kvm_state;
5272 int idx;
5273
5274 r = -EINVAL;
5275 if (!kvm_x86_ops.nested_ops->set_state)
5276 break;
5277
5278 r = -EFAULT;
5279 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
5280 break;
5281
5282 r = -EINVAL;
5283 if (kvm_state.size < sizeof(kvm_state))
5284 break;
5285
5286 if (kvm_state.flags &
5287 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
5288 | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
5289 | KVM_STATE_NESTED_GIF_SET))
5290 break;
5291
5292
5293 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
5294 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
5295 break;
5296
5297 idx = srcu_read_lock(&vcpu->kvm->srcu);
5298 r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
5299 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5300 break;
5301 }
5302 case KVM_GET_SUPPORTED_HV_CPUID:
5303 r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
5304 break;
5305#ifdef CONFIG_KVM_XEN
5306 case KVM_XEN_VCPU_GET_ATTR: {
5307 struct kvm_xen_vcpu_attr xva;
5308
5309 r = -EFAULT;
5310 if (copy_from_user(&xva, argp, sizeof(xva)))
5311 goto out;
5312 r = kvm_xen_vcpu_get_attr(vcpu, &xva);
5313 if (!r && copy_to_user(argp, &xva, sizeof(xva)))
5314 r = -EFAULT;
5315 break;
5316 }
5317 case KVM_XEN_VCPU_SET_ATTR: {
5318 struct kvm_xen_vcpu_attr xva;
5319
5320 r = -EFAULT;
5321 if (copy_from_user(&xva, argp, sizeof(xva)))
5322 goto out;
5323 r = kvm_xen_vcpu_set_attr(vcpu, &xva);
5324 break;
5325 }
5326#endif
5327 case KVM_GET_SREGS2: {
5328 u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL);
5329 r = -ENOMEM;
5330 if (!u.sregs2)
5331 goto out;
5332 __get_sregs2(vcpu, u.sregs2);
5333 r = -EFAULT;
5334 if (copy_to_user(argp, u.sregs2, sizeof(struct kvm_sregs2)))
5335 goto out;
5336 r = 0;
5337 break;
5338 }
5339 case KVM_SET_SREGS2: {
5340 u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2));
5341 if (IS_ERR(u.sregs2)) {
5342 r = PTR_ERR(u.sregs2);
5343 u.sregs2 = NULL;
5344 goto out;
5345 }
5346 r = __set_sregs2(vcpu, u.sregs2);
5347 break;
5348 }
5349 default:
5350 r = -EINVAL;
5351 }
5352out:
5353 kfree(u.buffer);
5354out_nofree:
5355 vcpu_put(vcpu);
5356 return r;
5357}
5358
5359vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5360{
5361 return VM_FAULT_SIGBUS;
5362}
5363
5364static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
5365{
5366 int ret;
5367
5368 if (addr > (unsigned int)(-3 * PAGE_SIZE))
5369 return -EINVAL;
5370 ret = static_call(kvm_x86_set_tss_addr)(kvm, addr);
5371 return ret;
5372}
5373
5374static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
5375 u64 ident_addr)
5376{
5377 return static_call(kvm_x86_set_identity_map_addr)(kvm, ident_addr);
5378}
5379
5380static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
5381 unsigned long kvm_nr_mmu_pages)
5382{
5383 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
5384 return -EINVAL;
5385
5386 mutex_lock(&kvm->slots_lock);
5387
5388 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
5389 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
5390
5391 mutex_unlock(&kvm->slots_lock);
5392 return 0;
5393}
5394
5395static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
5396{
5397 return kvm->arch.n_max_mmu_pages;
5398}
5399
5400static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5401{
5402 struct kvm_pic *pic = kvm->arch.vpic;
5403 int r;
5404
5405 r = 0;
5406 switch (chip->chip_id) {
5407 case KVM_IRQCHIP_PIC_MASTER:
5408 memcpy(&chip->chip.pic, &pic->pics[0],
5409 sizeof(struct kvm_pic_state));
5410 break;
5411 case KVM_IRQCHIP_PIC_SLAVE:
5412 memcpy(&chip->chip.pic, &pic->pics[1],
5413 sizeof(struct kvm_pic_state));
5414 break;
5415 case KVM_IRQCHIP_IOAPIC:
5416 kvm_get_ioapic(kvm, &chip->chip.ioapic);
5417 break;
5418 default:
5419 r = -EINVAL;
5420 break;
5421 }
5422 return r;
5423}
5424
5425static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5426{
5427 struct kvm_pic *pic = kvm->arch.vpic;
5428 int r;
5429
5430 r = 0;
5431 switch (chip->chip_id) {
5432 case KVM_IRQCHIP_PIC_MASTER:
5433 spin_lock(&pic->lock);
5434 memcpy(&pic->pics[0], &chip->chip.pic,
5435 sizeof(struct kvm_pic_state));
5436 spin_unlock(&pic->lock);
5437 break;
5438 case KVM_IRQCHIP_PIC_SLAVE:
5439 spin_lock(&pic->lock);
5440 memcpy(&pic->pics[1], &chip->chip.pic,
5441 sizeof(struct kvm_pic_state));
5442 spin_unlock(&pic->lock);
5443 break;
5444 case KVM_IRQCHIP_IOAPIC:
5445 kvm_set_ioapic(kvm, &chip->chip.ioapic);
5446 break;
5447 default:
5448 r = -EINVAL;
5449 break;
5450 }
5451 kvm_pic_update_irq(pic);
5452 return r;
5453}
5454
5455static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
5456{
5457 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
5458
5459 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
5460
5461 mutex_lock(&kps->lock);
5462 memcpy(ps, &kps->channels, sizeof(*ps));
5463 mutex_unlock(&kps->lock);
5464 return 0;
5465}
5466
5467static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
5468{
5469 int i;
5470 struct kvm_pit *pit = kvm->arch.vpit;
5471
5472 mutex_lock(&pit->pit_state.lock);
5473 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
5474 for (i = 0; i < 3; i++)
5475 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
5476 mutex_unlock(&pit->pit_state.lock);
5477 return 0;
5478}
5479
5480static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
5481{
5482 mutex_lock(&kvm->arch.vpit->pit_state.lock);
5483 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
5484 sizeof(ps->channels));
5485 ps->flags = kvm->arch.vpit->pit_state.flags;
5486 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
5487 memset(&ps->reserved, 0, sizeof(ps->reserved));
5488 return 0;
5489}
5490
5491static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
5492{
5493 int start = 0;
5494 int i;
5495 u32 prev_legacy, cur_legacy;
5496 struct kvm_pit *pit = kvm->arch.vpit;
5497
5498 mutex_lock(&pit->pit_state.lock);
5499 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
5500 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
5501 if (!prev_legacy && cur_legacy)
5502 start = 1;
5503 memcpy(&pit->pit_state.channels, &ps->channels,
5504 sizeof(pit->pit_state.channels));
5505 pit->pit_state.flags = ps->flags;
5506 for (i = 0; i < 3; i++)
5507 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
5508 start && i == 0);
5509 mutex_unlock(&pit->pit_state.lock);
5510 return 0;
5511}
5512
5513static int kvm_vm_ioctl_reinject(struct kvm *kvm,
5514 struct kvm_reinject_control *control)
5515{
5516 struct kvm_pit *pit = kvm->arch.vpit;
5517
5518
5519
5520
5521
5522 mutex_lock(&pit->pit_state.lock);
5523 kvm_pit_set_reinject(pit, control->pit_reinject);
5524 mutex_unlock(&pit->pit_state.lock);
5525
5526 return 0;
5527}
5528
5529void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
5530{
5531
5532
5533
5534
5535
5536
5537
5538 struct kvm_vcpu *vcpu;
5539 int i;
5540
5541 kvm_for_each_vcpu(i, vcpu, kvm)
5542 kvm_vcpu_kick(vcpu);
5543}
5544
5545int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
5546 bool line_status)
5547{
5548 if (!irqchip_in_kernel(kvm))
5549 return -ENXIO;
5550
5551 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
5552 irq_event->irq, irq_event->level,
5553 line_status);
5554 return 0;
5555}
5556
5557int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
5558 struct kvm_enable_cap *cap)
5559{
5560 int r;
5561
5562 if (cap->flags)
5563 return -EINVAL;
5564
5565 switch (cap->cap) {
5566 case KVM_CAP_DISABLE_QUIRKS:
5567 kvm->arch.disabled_quirks = cap->args[0];
5568 r = 0;
5569 break;
5570 case KVM_CAP_SPLIT_IRQCHIP: {
5571 mutex_lock(&kvm->lock);
5572 r = -EINVAL;
5573 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
5574 goto split_irqchip_unlock;
5575 r = -EEXIST;
5576 if (irqchip_in_kernel(kvm))
5577 goto split_irqchip_unlock;
5578 if (kvm->created_vcpus)
5579 goto split_irqchip_unlock;
5580 r = kvm_setup_empty_irq_routing(kvm);
5581 if (r)
5582 goto split_irqchip_unlock;
5583
5584 smp_wmb();
5585 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
5586 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
5587 r = 0;
5588split_irqchip_unlock:
5589 mutex_unlock(&kvm->lock);
5590 break;
5591 }
5592 case KVM_CAP_X2APIC_API:
5593 r = -EINVAL;
5594 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
5595 break;
5596
5597 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
5598 kvm->arch.x2apic_format = true;
5599 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
5600 kvm->arch.x2apic_broadcast_quirk_disabled = true;
5601
5602 r = 0;
5603 break;
5604 case KVM_CAP_X86_DISABLE_EXITS:
5605 r = -EINVAL;
5606 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
5607 break;
5608
5609 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
5610 kvm_can_mwait_in_guest())
5611 kvm->arch.mwait_in_guest = true;
5612 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
5613 kvm->arch.hlt_in_guest = true;
5614 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
5615 kvm->arch.pause_in_guest = true;
5616 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
5617 kvm->arch.cstate_in_guest = true;
5618 r = 0;
5619 break;
5620 case KVM_CAP_MSR_PLATFORM_INFO:
5621 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
5622 r = 0;
5623 break;
5624 case KVM_CAP_EXCEPTION_PAYLOAD:
5625 kvm->arch.exception_payload_enabled = cap->args[0];
5626 r = 0;
5627 break;
5628 case KVM_CAP_X86_USER_SPACE_MSR:
5629 kvm->arch.user_space_msr_mask = cap->args[0];
5630 r = 0;
5631 break;
5632 case KVM_CAP_X86_BUS_LOCK_EXIT:
5633 r = -EINVAL;
5634 if (cap->args[0] & ~KVM_BUS_LOCK_DETECTION_VALID_MODE)
5635 break;
5636
5637 if ((cap->args[0] & KVM_BUS_LOCK_DETECTION_OFF) &&
5638 (cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT))
5639 break;
5640
5641 if (kvm_has_bus_lock_exit &&
5642 cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)
5643 kvm->arch.bus_lock_detection_enabled = true;
5644 r = 0;
5645 break;
5646#ifdef CONFIG_X86_SGX_KVM
5647 case KVM_CAP_SGX_ATTRIBUTE: {
5648 unsigned long allowed_attributes = 0;
5649
5650 r = sgx_set_attribute(&allowed_attributes, cap->args[0]);
5651 if (r)
5652 break;
5653
5654
5655 if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) &&
5656 !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY))
5657 kvm->arch.sgx_provisioning_allowed = true;
5658 else
5659 r = -EINVAL;
5660 break;
5661 }
5662#endif
5663 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
5664 r = -EINVAL;
5665 if (kvm_x86_ops.vm_copy_enc_context_from)
5666 r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
5667 return r;
5668 case KVM_CAP_EXIT_HYPERCALL:
5669 if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
5670 r = -EINVAL;
5671 break;
5672 }
5673 kvm->arch.hypercall_exit_enabled = cap->args[0];
5674 r = 0;
5675 break;
5676 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
5677 r = -EINVAL;
5678 if (cap->args[0] & ~1)
5679 break;
5680 kvm->arch.exit_on_emulation_error = cap->args[0];
5681 r = 0;
5682 break;
5683 default:
5684 r = -EINVAL;
5685 break;
5686 }
5687 return r;
5688}
5689
5690static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
5691{
5692 struct kvm_x86_msr_filter *msr_filter;
5693
5694 msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
5695 if (!msr_filter)
5696 return NULL;
5697
5698 msr_filter->default_allow = default_allow;
5699 return msr_filter;
5700}
5701
5702static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
5703{
5704 u32 i;
5705
5706 if (!msr_filter)
5707 return;
5708
5709 for (i = 0; i < msr_filter->count; i++)
5710 kfree(msr_filter->ranges[i].bitmap);
5711
5712 kfree(msr_filter);
5713}
5714
5715static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
5716 struct kvm_msr_filter_range *user_range)
5717{
5718 unsigned long *bitmap = NULL;
5719 size_t bitmap_size;
5720
5721 if (!user_range->nmsrs)
5722 return 0;
5723
5724 if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
5725 return -EINVAL;
5726
5727 if (!user_range->flags)
5728 return -EINVAL;
5729
5730 bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
5731 if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
5732 return -EINVAL;
5733
5734 bitmap = memdup_user((__user u8*)user_range->bitmap, bitmap_size);
5735 if (IS_ERR(bitmap))
5736 return PTR_ERR(bitmap);
5737
5738 msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
5739 .flags = user_range->flags,
5740 .base = user_range->base,
5741 .nmsrs = user_range->nmsrs,
5742 .bitmap = bitmap,
5743 };
5744
5745 msr_filter->count++;
5746 return 0;
5747}
5748
5749static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
5750{
5751 struct kvm_msr_filter __user *user_msr_filter = argp;
5752 struct kvm_x86_msr_filter *new_filter, *old_filter;
5753 struct kvm_msr_filter filter;
5754 bool default_allow;
5755 bool empty = true;
5756 int r = 0;
5757 u32 i;
5758
5759 if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
5760 return -EFAULT;
5761
5762 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
5763 empty &= !filter.ranges[i].nmsrs;
5764
5765 default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
5766 if (empty && !default_allow)
5767 return -EINVAL;
5768
5769 new_filter = kvm_alloc_msr_filter(default_allow);
5770 if (!new_filter)
5771 return -ENOMEM;
5772
5773 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
5774 r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
5775 if (r) {
5776 kvm_free_msr_filter(new_filter);
5777 return r;
5778 }
5779 }
5780
5781 mutex_lock(&kvm->lock);
5782
5783
5784 old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
5785
5786 rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
5787 synchronize_srcu(&kvm->srcu);
5788
5789 kvm_free_msr_filter(old_filter);
5790
5791 kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
5792 mutex_unlock(&kvm->lock);
5793
5794 return 0;
5795}
5796
5797#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
5798static int kvm_arch_suspend_notifier(struct kvm *kvm)
5799{
5800 struct kvm_vcpu *vcpu;
5801 int i, ret = 0;
5802
5803 mutex_lock(&kvm->lock);
5804 kvm_for_each_vcpu(i, vcpu, kvm) {
5805 if (!vcpu->arch.pv_time_enabled)
5806 continue;
5807
5808 ret = kvm_set_guest_paused(vcpu);
5809 if (ret) {
5810 kvm_err("Failed to pause guest VCPU%d: %d\n",
5811 vcpu->vcpu_id, ret);
5812 break;
5813 }
5814 }
5815 mutex_unlock(&kvm->lock);
5816
5817 return ret ? NOTIFY_BAD : NOTIFY_DONE;
5818}
5819
5820int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
5821{
5822 switch (state) {
5823 case PM_HIBERNATION_PREPARE:
5824 case PM_SUSPEND_PREPARE:
5825 return kvm_arch_suspend_notifier(kvm);
5826 }
5827
5828 return NOTIFY_DONE;
5829}
5830#endif
5831
5832long kvm_arch_vm_ioctl(struct file *filp,
5833 unsigned int ioctl, unsigned long arg)
5834{
5835 struct kvm *kvm = filp->private_data;
5836 void __user *argp = (void __user *)arg;
5837 int r = -ENOTTY;
5838
5839
5840
5841
5842
5843 union {
5844 struct kvm_pit_state ps;
5845 struct kvm_pit_state2 ps2;
5846 struct kvm_pit_config pit_config;
5847 } u;
5848
5849 switch (ioctl) {
5850 case KVM_SET_TSS_ADDR:
5851 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
5852 break;
5853 case KVM_SET_IDENTITY_MAP_ADDR: {
5854 u64 ident_addr;
5855
5856 mutex_lock(&kvm->lock);
5857 r = -EINVAL;
5858 if (kvm->created_vcpus)
5859 goto set_identity_unlock;
5860 r = -EFAULT;
5861 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
5862 goto set_identity_unlock;
5863 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
5864set_identity_unlock:
5865 mutex_unlock(&kvm->lock);
5866 break;
5867 }
5868 case KVM_SET_NR_MMU_PAGES:
5869 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
5870 break;
5871 case KVM_GET_NR_MMU_PAGES:
5872 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
5873 break;
5874 case KVM_CREATE_IRQCHIP: {
5875 mutex_lock(&kvm->lock);
5876
5877 r = -EEXIST;
5878 if (irqchip_in_kernel(kvm))
5879 goto create_irqchip_unlock;
5880
5881 r = -EINVAL;
5882 if (kvm->created_vcpus)
5883 goto create_irqchip_unlock;
5884
5885 r = kvm_pic_init(kvm);
5886 if (r)
5887 goto create_irqchip_unlock;
5888
5889 r = kvm_ioapic_init(kvm);
5890 if (r) {
5891 kvm_pic_destroy(kvm);
5892 goto create_irqchip_unlock;
5893 }
5894
5895 r = kvm_setup_default_irq_routing(kvm);
5896 if (r) {
5897 kvm_ioapic_destroy(kvm);
5898 kvm_pic_destroy(kvm);
5899 goto create_irqchip_unlock;
5900 }
5901
5902 smp_wmb();
5903 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
5904 create_irqchip_unlock:
5905 mutex_unlock(&kvm->lock);
5906 break;
5907 }
5908 case KVM_CREATE_PIT:
5909 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
5910 goto create_pit;
5911 case KVM_CREATE_PIT2:
5912 r = -EFAULT;
5913 if (copy_from_user(&u.pit_config, argp,
5914 sizeof(struct kvm_pit_config)))
5915 goto out;
5916 create_pit:
5917 mutex_lock(&kvm->lock);
5918 r = -EEXIST;
5919 if (kvm->arch.vpit)
5920 goto create_pit_unlock;
5921 r = -ENOMEM;
5922 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
5923 if (kvm->arch.vpit)
5924 r = 0;
5925 create_pit_unlock:
5926 mutex_unlock(&kvm->lock);
5927 break;
5928 case KVM_GET_IRQCHIP: {
5929
5930 struct kvm_irqchip *chip;
5931
5932 chip = memdup_user(argp, sizeof(*chip));
5933 if (IS_ERR(chip)) {
5934 r = PTR_ERR(chip);
5935 goto out;
5936 }
5937
5938 r = -ENXIO;
5939 if (!irqchip_kernel(kvm))
5940 goto get_irqchip_out;
5941 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
5942 if (r)
5943 goto get_irqchip_out;
5944 r = -EFAULT;
5945 if (copy_to_user(argp, chip, sizeof(*chip)))
5946 goto get_irqchip_out;
5947 r = 0;
5948 get_irqchip_out:
5949 kfree(chip);
5950 break;
5951 }
5952 case KVM_SET_IRQCHIP: {
5953
5954 struct kvm_irqchip *chip;
5955
5956 chip = memdup_user(argp, sizeof(*chip));
5957 if (IS_ERR(chip)) {
5958 r = PTR_ERR(chip);
5959 goto out;
5960 }
5961
5962 r = -ENXIO;
5963 if (!irqchip_kernel(kvm))
5964 goto set_irqchip_out;
5965 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
5966 set_irqchip_out:
5967 kfree(chip);
5968 break;
5969 }
5970 case KVM_GET_PIT: {
5971 r = -EFAULT;
5972 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
5973 goto out;
5974 r = -ENXIO;
5975 if (!kvm->arch.vpit)
5976 goto out;
5977 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
5978 if (r)
5979 goto out;
5980 r = -EFAULT;
5981 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
5982 goto out;
5983 r = 0;
5984 break;
5985 }
5986 case KVM_SET_PIT: {
5987 r = -EFAULT;
5988 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
5989 goto out;
5990 mutex_lock(&kvm->lock);
5991 r = -ENXIO;
5992 if (!kvm->arch.vpit)
5993 goto set_pit_out;
5994 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
5995set_pit_out:
5996 mutex_unlock(&kvm->lock);
5997 break;
5998 }
5999 case KVM_GET_PIT2: {
6000 r = -ENXIO;
6001 if (!kvm->arch.vpit)
6002 goto out;
6003 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
6004 if (r)
6005 goto out;
6006 r = -EFAULT;
6007 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
6008 goto out;
6009 r = 0;
6010 break;
6011 }
6012 case KVM_SET_PIT2: {
6013 r = -EFAULT;
6014 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
6015 goto out;
6016 mutex_lock(&kvm->lock);
6017 r = -ENXIO;
6018 if (!kvm->arch.vpit)
6019 goto set_pit2_out;
6020 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
6021set_pit2_out:
6022 mutex_unlock(&kvm->lock);
6023 break;
6024 }
6025 case KVM_REINJECT_CONTROL: {
6026 struct kvm_reinject_control control;
6027 r = -EFAULT;
6028 if (copy_from_user(&control, argp, sizeof(control)))
6029 goto out;
6030 r = -ENXIO;
6031 if (!kvm->arch.vpit)
6032 goto out;
6033 r = kvm_vm_ioctl_reinject(kvm, &control);
6034 break;
6035 }
6036 case KVM_SET_BOOT_CPU_ID:
6037 r = 0;
6038 mutex_lock(&kvm->lock);
6039 if (kvm->created_vcpus)
6040 r = -EBUSY;
6041 else
6042 kvm->arch.bsp_vcpu_id = arg;
6043 mutex_unlock(&kvm->lock);
6044 break;
6045#ifdef CONFIG_KVM_XEN
6046 case KVM_XEN_HVM_CONFIG: {
6047 struct kvm_xen_hvm_config xhc;
6048 r = -EFAULT;
6049 if (copy_from_user(&xhc, argp, sizeof(xhc)))
6050 goto out;
6051 r = kvm_xen_hvm_config(kvm, &xhc);
6052 break;
6053 }
6054 case KVM_XEN_HVM_GET_ATTR: {
6055 struct kvm_xen_hvm_attr xha;
6056
6057 r = -EFAULT;
6058 if (copy_from_user(&xha, argp, sizeof(xha)))
6059 goto out;
6060 r = kvm_xen_hvm_get_attr(kvm, &xha);
6061 if (!r && copy_to_user(argp, &xha, sizeof(xha)))
6062 r = -EFAULT;
6063 break;
6064 }
6065 case KVM_XEN_HVM_SET_ATTR: {
6066 struct kvm_xen_hvm_attr xha;
6067
6068 r = -EFAULT;
6069 if (copy_from_user(&xha, argp, sizeof(xha)))
6070 goto out;
6071 r = kvm_xen_hvm_set_attr(kvm, &xha);
6072 break;
6073 }
6074#endif
6075 case KVM_SET_CLOCK: {
6076 struct kvm_arch *ka = &kvm->arch;
6077 struct kvm_clock_data user_ns;
6078 u64 now_ns;
6079
6080 r = -EFAULT;
6081 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
6082 goto out;
6083
6084 r = -EINVAL;
6085 if (user_ns.flags)
6086 goto out;
6087
6088 r = 0;
6089
6090
6091
6092
6093
6094 kvm_gen_update_masterclock(kvm);
6095
6096
6097
6098
6099
6100
6101
6102
6103 raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
6104 if (kvm->arch.use_master_clock)
6105 now_ns = ka->master_kernel_ns;
6106 else
6107 now_ns = get_kvmclock_base_ns();
6108 ka->kvmclock_offset = user_ns.clock - now_ns;
6109 raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
6110
6111 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
6112 break;
6113 }
6114 case KVM_GET_CLOCK: {
6115 struct kvm_clock_data user_ns;
6116 u64 now_ns;
6117
6118 now_ns = get_kvmclock_ns(kvm);
6119 user_ns.clock = now_ns;
6120 user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
6121 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
6122
6123 r = -EFAULT;
6124 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
6125 goto out;
6126 r = 0;
6127 break;
6128 }
6129 case KVM_MEMORY_ENCRYPT_OP: {
6130 r = -ENOTTY;
6131 if (kvm_x86_ops.mem_enc_op)
6132 r = static_call(kvm_x86_mem_enc_op)(kvm, argp);
6133 break;
6134 }
6135 case KVM_MEMORY_ENCRYPT_REG_REGION: {
6136 struct kvm_enc_region region;
6137
6138 r = -EFAULT;
6139 if (copy_from_user(®ion, argp, sizeof(region)))
6140 goto out;
6141
6142 r = -ENOTTY;
6143 if (kvm_x86_ops.mem_enc_reg_region)
6144 r = static_call(kvm_x86_mem_enc_reg_region)(kvm, ®ion);
6145 break;
6146 }
6147 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
6148 struct kvm_enc_region region;
6149
6150 r = -EFAULT;
6151 if (copy_from_user(®ion, argp, sizeof(region)))
6152 goto out;
6153
6154 r = -ENOTTY;
6155 if (kvm_x86_ops.mem_enc_unreg_region)
6156 r = static_call(kvm_x86_mem_enc_unreg_region)(kvm, ®ion);
6157 break;
6158 }
6159 case KVM_HYPERV_EVENTFD: {
6160 struct kvm_hyperv_eventfd hvevfd;
6161
6162 r = -EFAULT;
6163 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
6164 goto out;
6165 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
6166 break;
6167 }
6168 case KVM_SET_PMU_EVENT_FILTER:
6169 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
6170 break;
6171 case KVM_X86_SET_MSR_FILTER:
6172 r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
6173 break;
6174 default:
6175 r = -ENOTTY;
6176 }
6177out:
6178 return r;
6179}
6180
6181static void kvm_init_msr_list(void)
6182{
6183 struct x86_pmu_capability x86_pmu;
6184 u32 dummy[2];
6185 unsigned i;
6186
6187 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
6188 "Please update the fixed PMCs in msrs_to_saved_all[]");
6189
6190 perf_get_x86_pmu_capability(&x86_pmu);
6191
6192 num_msrs_to_save = 0;
6193 num_emulated_msrs = 0;
6194 num_msr_based_features = 0;
6195
6196 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
6197 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
6198 continue;
6199
6200
6201
6202
6203
6204 switch (msrs_to_save_all[i]) {
6205 case MSR_IA32_BNDCFGS:
6206 if (!kvm_mpx_supported())
6207 continue;
6208 break;
6209 case MSR_TSC_AUX:
6210 if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
6211 !kvm_cpu_cap_has(X86_FEATURE_RDPID))
6212 continue;
6213 break;
6214 case MSR_IA32_UMWAIT_CONTROL:
6215 if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
6216 continue;
6217 break;
6218 case MSR_IA32_RTIT_CTL:
6219 case MSR_IA32_RTIT_STATUS:
6220 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
6221 continue;
6222 break;
6223 case MSR_IA32_RTIT_CR3_MATCH:
6224 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6225 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
6226 continue;
6227 break;
6228 case MSR_IA32_RTIT_OUTPUT_BASE:
6229 case MSR_IA32_RTIT_OUTPUT_MASK:
6230 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6231 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
6232 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
6233 continue;
6234 break;
6235 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
6236 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6237 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
6238 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
6239 continue;
6240 break;
6241 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
6242 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
6243 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
6244 continue;
6245 break;
6246 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
6247 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
6248 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
6249 continue;
6250 break;
6251 default:
6252 break;
6253 }
6254
6255 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
6256 }
6257
6258 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
6259 if (!static_call(kvm_x86_has_emulated_msr)(NULL, emulated_msrs_all[i]))
6260 continue;
6261
6262 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
6263 }
6264
6265 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
6266 struct kvm_msr_entry msr;
6267
6268 msr.index = msr_based_features_all[i];
6269 if (kvm_get_msr_feature(&msr))
6270 continue;
6271
6272 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
6273 }
6274}
6275
6276static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
6277 const void *v)
6278{
6279 int handled = 0;
6280 int n;
6281
6282 do {
6283 n = min(len, 8);
6284 if (!(lapic_in_kernel(vcpu) &&
6285 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
6286 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
6287 break;
6288 handled += n;
6289 addr += n;
6290 len -= n;
6291 v += n;
6292 } while (len);
6293
6294 return handled;
6295}
6296
6297static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
6298{
6299 int handled = 0;
6300 int n;
6301
6302 do {
6303 n = min(len, 8);
6304 if (!(lapic_in_kernel(vcpu) &&
6305 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
6306 addr, n, v))
6307 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
6308 break;
6309 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
6310 handled += n;
6311 addr += n;
6312 len -= n;
6313 v += n;
6314 } while (len);
6315
6316 return handled;
6317}
6318
6319static void kvm_set_segment(struct kvm_vcpu *vcpu,
6320 struct kvm_segment *var, int seg)
6321{
6322 static_call(kvm_x86_set_segment)(vcpu, var, seg);
6323}
6324
6325void kvm_get_segment(struct kvm_vcpu *vcpu,
6326 struct kvm_segment *var, int seg)
6327{
6328 static_call(kvm_x86_get_segment)(vcpu, var, seg);
6329}
6330
6331gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
6332 struct x86_exception *exception)
6333{
6334 gpa_t t_gpa;
6335
6336 BUG_ON(!mmu_is_nested(vcpu));
6337
6338
6339 access |= PFERR_USER_MASK;
6340 t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
6341
6342 return t_gpa;
6343}
6344
6345gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
6346 struct x86_exception *exception)
6347{
6348 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6349 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6350}
6351EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
6352
6353 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
6354 struct x86_exception *exception)
6355{
6356 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6357 access |= PFERR_FETCH_MASK;
6358 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6359}
6360
6361gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
6362 struct x86_exception *exception)
6363{
6364 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6365 access |= PFERR_WRITE_MASK;
6366 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6367}
6368EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
6369
6370
6371gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
6372 struct x86_exception *exception)
6373{
6374 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
6375}
6376
6377static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
6378 struct kvm_vcpu *vcpu, u32 access,
6379 struct x86_exception *exception)
6380{
6381 void *data = val;
6382 int r = X86EMUL_CONTINUE;
6383
6384 while (bytes) {
6385 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
6386 exception);
6387 unsigned offset = addr & (PAGE_SIZE-1);
6388 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
6389 int ret;
6390
6391 if (gpa == UNMAPPED_GVA)
6392 return X86EMUL_PROPAGATE_FAULT;
6393 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
6394 offset, toread);
6395 if (ret < 0) {
6396 r = X86EMUL_IO_NEEDED;
6397 goto out;
6398 }
6399
6400 bytes -= toread;
6401 data += toread;
6402 addr += toread;
6403 }
6404out:
6405 return r;
6406}
6407
6408
6409static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
6410 gva_t addr, void *val, unsigned int bytes,
6411 struct x86_exception *exception)
6412{
6413 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6414 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6415 unsigned offset;
6416 int ret;
6417
6418
6419 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
6420 exception);
6421 if (unlikely(gpa == UNMAPPED_GVA))
6422 return X86EMUL_PROPAGATE_FAULT;
6423
6424 offset = addr & (PAGE_SIZE-1);
6425 if (WARN_ON(offset + bytes > PAGE_SIZE))
6426 bytes = (unsigned)PAGE_SIZE - offset;
6427 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
6428 offset, bytes);
6429 if (unlikely(ret < 0))
6430 return X86EMUL_IO_NEEDED;
6431
6432 return X86EMUL_CONTINUE;
6433}
6434
6435int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
6436 gva_t addr, void *val, unsigned int bytes,
6437 struct x86_exception *exception)
6438{
6439 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6440
6441
6442
6443
6444
6445
6446
6447 memset(exception, 0, sizeof(*exception));
6448 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
6449 exception);
6450}
6451EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
6452
6453static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
6454 gva_t addr, void *val, unsigned int bytes,
6455 struct x86_exception *exception, bool system)
6456{
6457 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6458 u32 access = 0;
6459
6460 if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3)
6461 access |= PFERR_USER_MASK;
6462
6463 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
6464}
6465
6466static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
6467 unsigned long addr, void *val, unsigned int bytes)
6468{
6469 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6470 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
6471
6472 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
6473}
6474
6475static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
6476 struct kvm_vcpu *vcpu, u32 access,
6477 struct x86_exception *exception)
6478{
6479 void *data = val;
6480 int r = X86EMUL_CONTINUE;
6481
6482 while (bytes) {
6483 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
6484 access,
6485 exception);
6486 unsigned offset = addr & (PAGE_SIZE-1);
6487 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
6488 int ret;
6489
6490 if (gpa == UNMAPPED_GVA)
6491 return X86EMUL_PROPAGATE_FAULT;
6492 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
6493 if (ret < 0) {
6494 r = X86EMUL_IO_NEEDED;
6495 goto out;
6496 }
6497
6498 bytes -= towrite;
6499 data += towrite;
6500 addr += towrite;
6501 }
6502out:
6503 return r;
6504}
6505
6506static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
6507 unsigned int bytes, struct x86_exception *exception,
6508 bool system)
6509{
6510 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6511 u32 access = PFERR_WRITE_MASK;
6512
6513 if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3)
6514 access |= PFERR_USER_MASK;
6515
6516 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
6517 access, exception);
6518}
6519
6520int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
6521 unsigned int bytes, struct x86_exception *exception)
6522{
6523
6524 vcpu->arch.l1tf_flush_l1d = true;
6525
6526 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
6527 PFERR_WRITE_MASK, exception);
6528}
6529EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
6530
6531int handle_ud(struct kvm_vcpu *vcpu)
6532{
6533 static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
6534 int emul_type = EMULTYPE_TRAP_UD;
6535 char sig[5];
6536 struct x86_exception e;
6537
6538 if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0)))
6539 return 1;
6540
6541 if (force_emulation_prefix &&
6542 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
6543 sig, sizeof(sig), &e) == 0 &&
6544 memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
6545 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
6546 emul_type = EMULTYPE_TRAP_UD_FORCED;
6547 }
6548
6549 return kvm_emulate_instruction(vcpu, emul_type);
6550}
6551EXPORT_SYMBOL_GPL(handle_ud);
6552
6553static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
6554 gpa_t gpa, bool write)
6555{
6556
6557 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
6558 return 1;
6559
6560 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
6561 trace_vcpu_match_mmio(gva, gpa, write, true);
6562 return 1;
6563 }
6564
6565 return 0;
6566}
6567
6568static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
6569 gpa_t *gpa, struct x86_exception *exception,
6570 bool write)
6571{
6572 u32 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0)
6573 | (write ? PFERR_WRITE_MASK : 0);
6574
6575
6576
6577
6578
6579
6580 if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) ||
6581 !permission_fault(vcpu, vcpu->arch.walk_mmu,
6582 vcpu->arch.mmio_access, 0, access))) {
6583 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
6584 (gva & (PAGE_SIZE - 1));
6585 trace_vcpu_match_mmio(gva, *gpa, write, false);
6586 return 1;
6587 }
6588
6589 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6590
6591 if (*gpa == UNMAPPED_GVA)
6592 return -1;
6593
6594 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
6595}
6596
6597int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
6598 const void *val, int bytes)
6599{
6600 int ret;
6601
6602 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
6603 if (ret < 0)
6604 return 0;
6605 kvm_page_track_write(vcpu, gpa, val, bytes);
6606 return 1;
6607}
6608
6609struct read_write_emulator_ops {
6610 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
6611 int bytes);
6612 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
6613 void *val, int bytes);
6614 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
6615 int bytes, void *val);
6616 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
6617 void *val, int bytes);
6618 bool write;
6619};
6620
6621static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
6622{
6623 if (vcpu->mmio_read_completed) {
6624 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
6625 vcpu->mmio_fragments[0].gpa, val);
6626 vcpu->mmio_read_completed = 0;
6627 return 1;
6628 }
6629
6630 return 0;
6631}
6632
6633static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
6634 void *val, int bytes)
6635{
6636 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
6637}
6638
6639static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
6640 void *val, int bytes)
6641{
6642 return emulator_write_phys(vcpu, gpa, val, bytes);
6643}
6644
6645static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
6646{
6647 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
6648 return vcpu_mmio_write(vcpu, gpa, bytes, val);
6649}
6650
6651static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
6652 void *val, int bytes)
6653{
6654 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
6655 return X86EMUL_IO_NEEDED;
6656}
6657
6658static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
6659 void *val, int bytes)
6660{
6661 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
6662
6663 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
6664 return X86EMUL_CONTINUE;
6665}
6666
6667static const struct read_write_emulator_ops read_emultor = {
6668 .read_write_prepare = read_prepare,
6669 .read_write_emulate = read_emulate,
6670 .read_write_mmio = vcpu_mmio_read,
6671 .read_write_exit_mmio = read_exit_mmio,
6672};
6673
6674static const struct read_write_emulator_ops write_emultor = {
6675 .read_write_emulate = write_emulate,
6676 .read_write_mmio = write_mmio,
6677 .read_write_exit_mmio = write_exit_mmio,
6678 .write = true,
6679};
6680
6681static int emulator_read_write_onepage(unsigned long addr, void *val,
6682 unsigned int bytes,
6683 struct x86_exception *exception,
6684 struct kvm_vcpu *vcpu,
6685 const struct read_write_emulator_ops *ops)
6686{
6687 gpa_t gpa;
6688 int handled, ret;
6689 bool write = ops->write;
6690 struct kvm_mmio_fragment *frag;
6691 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6692
6693
6694
6695
6696
6697
6698
6699
6700 if (ctxt->gpa_available && emulator_can_use_gpa(ctxt) &&
6701 (addr & ~PAGE_MASK) == (ctxt->gpa_val & ~PAGE_MASK)) {
6702 gpa = ctxt->gpa_val;
6703 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
6704 } else {
6705 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
6706 if (ret < 0)
6707 return X86EMUL_PROPAGATE_FAULT;
6708 }
6709
6710 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
6711 return X86EMUL_CONTINUE;
6712
6713
6714
6715
6716 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
6717 if (handled == bytes)
6718 return X86EMUL_CONTINUE;
6719
6720 gpa += handled;
6721 bytes -= handled;
6722 val += handled;
6723
6724 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
6725 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
6726 frag->gpa = gpa;
6727 frag->data = val;
6728 frag->len = bytes;
6729 return X86EMUL_CONTINUE;
6730}
6731
6732static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
6733 unsigned long addr,
6734 void *val, unsigned int bytes,
6735 struct x86_exception *exception,
6736 const struct read_write_emulator_ops *ops)
6737{
6738 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6739 gpa_t gpa;
6740 int rc;
6741
6742 if (ops->read_write_prepare &&
6743 ops->read_write_prepare(vcpu, val, bytes))
6744 return X86EMUL_CONTINUE;
6745
6746 vcpu->mmio_nr_fragments = 0;
6747
6748
6749 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
6750 int now;
6751
6752 now = -addr & ~PAGE_MASK;
6753 rc = emulator_read_write_onepage(addr, val, now, exception,
6754 vcpu, ops);
6755
6756 if (rc != X86EMUL_CONTINUE)
6757 return rc;
6758 addr += now;
6759 if (ctxt->mode != X86EMUL_MODE_PROT64)
6760 addr = (u32)addr;
6761 val += now;
6762 bytes -= now;
6763 }
6764
6765 rc = emulator_read_write_onepage(addr, val, bytes, exception,
6766 vcpu, ops);
6767 if (rc != X86EMUL_CONTINUE)
6768 return rc;
6769
6770 if (!vcpu->mmio_nr_fragments)
6771 return rc;
6772
6773 gpa = vcpu->mmio_fragments[0].gpa;
6774
6775 vcpu->mmio_needed = 1;
6776 vcpu->mmio_cur_fragment = 0;
6777
6778 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
6779 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
6780 vcpu->run->exit_reason = KVM_EXIT_MMIO;
6781 vcpu->run->mmio.phys_addr = gpa;
6782
6783 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
6784}
6785
6786static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
6787 unsigned long addr,
6788 void *val,
6789 unsigned int bytes,
6790 struct x86_exception *exception)
6791{
6792 return emulator_read_write(ctxt, addr, val, bytes,
6793 exception, &read_emultor);
6794}
6795
6796static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
6797 unsigned long addr,
6798 const void *val,
6799 unsigned int bytes,
6800 struct x86_exception *exception)
6801{
6802 return emulator_read_write(ctxt, addr, (void *)val, bytes,
6803 exception, &write_emultor);
6804}
6805
6806#define CMPXCHG_TYPE(t, ptr, old, new) \
6807 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
6808
6809#ifdef CONFIG_X86_64
6810# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
6811#else
6812# define CMPXCHG64(ptr, old, new) \
6813 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
6814#endif
6815
6816static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
6817 unsigned long addr,
6818 const void *old,
6819 const void *new,
6820 unsigned int bytes,
6821 struct x86_exception *exception)
6822{
6823 struct kvm_host_map map;
6824 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6825 u64 page_line_mask;
6826 gpa_t gpa;
6827 char *kaddr;
6828 bool exchanged;
6829
6830
6831 if (bytes > 8 || (bytes & (bytes - 1)))
6832 goto emul_write;
6833
6834 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
6835
6836 if (gpa == UNMAPPED_GVA ||
6837 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
6838 goto emul_write;
6839
6840
6841
6842
6843
6844 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
6845 page_line_mask = ~(cache_line_size() - 1);
6846 else
6847 page_line_mask = PAGE_MASK;
6848
6849 if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
6850 goto emul_write;
6851
6852 if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
6853 goto emul_write;
6854
6855 kaddr = map.hva + offset_in_page(gpa);
6856
6857 switch (bytes) {
6858 case 1:
6859 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
6860 break;
6861 case 2:
6862 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
6863 break;
6864 case 4:
6865 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
6866 break;
6867 case 8:
6868 exchanged = CMPXCHG64(kaddr, old, new);
6869 break;
6870 default:
6871 BUG();
6872 }
6873
6874 kvm_vcpu_unmap(vcpu, &map, true);
6875
6876 if (!exchanged)
6877 return X86EMUL_CMPXCHG_FAILED;
6878
6879 kvm_page_track_write(vcpu, gpa, new, bytes);
6880
6881 return X86EMUL_CONTINUE;
6882
6883emul_write:
6884 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
6885
6886 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
6887}
6888
6889static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
6890{
6891 int r = 0, i;
6892
6893 for (i = 0; i < vcpu->arch.pio.count; i++) {
6894 if (vcpu->arch.pio.in)
6895 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
6896 vcpu->arch.pio.size, pd);
6897 else
6898 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
6899 vcpu->arch.pio.port, vcpu->arch.pio.size,
6900 pd);
6901 if (r)
6902 break;
6903 pd += vcpu->arch.pio.size;
6904 }
6905 return r;
6906}
6907
6908static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
6909 unsigned short port,
6910 unsigned int count, bool in)
6911{
6912 vcpu->arch.pio.port = port;
6913 vcpu->arch.pio.in = in;
6914 vcpu->arch.pio.count = count;
6915 vcpu->arch.pio.size = size;
6916
6917 if (!kernel_pio(vcpu, vcpu->arch.pio_data))
6918 return 1;
6919
6920 vcpu->run->exit_reason = KVM_EXIT_IO;
6921 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
6922 vcpu->run->io.size = size;
6923 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
6924 vcpu->run->io.count = count;
6925 vcpu->run->io.port = port;
6926
6927 return 0;
6928}
6929
6930static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
6931 unsigned short port, unsigned int count)
6932{
6933 WARN_ON(vcpu->arch.pio.count);
6934 memset(vcpu->arch.pio_data, 0, size * count);
6935 return emulator_pio_in_out(vcpu, size, port, count, true);
6936}
6937
6938static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
6939{
6940 int size = vcpu->arch.pio.size;
6941 unsigned count = vcpu->arch.pio.count;
6942 memcpy(val, vcpu->arch.pio_data, size * count);
6943 trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
6944 vcpu->arch.pio.count = 0;
6945}
6946
6947static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
6948 unsigned short port, void *val, unsigned int count)
6949{
6950 if (vcpu->arch.pio.count) {
6951
6952 } else {
6953 int r = __emulator_pio_in(vcpu, size, port, count);
6954 if (!r)
6955 return r;
6956
6957
6958 }
6959
6960 WARN_ON(count != vcpu->arch.pio.count);
6961 complete_emulator_pio_in(vcpu, val);
6962 return 1;
6963}
6964
6965static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
6966 int size, unsigned short port, void *val,
6967 unsigned int count)
6968{
6969 return emulator_pio_in(emul_to_vcpu(ctxt), size, port, val, count);
6970
6971}
6972
6973static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
6974 unsigned short port, const void *val,
6975 unsigned int count)
6976{
6977 int ret;
6978
6979 memcpy(vcpu->arch.pio_data, val, size * count);
6980 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
6981 ret = emulator_pio_in_out(vcpu, size, port, count, false);
6982 if (ret)
6983 vcpu->arch.pio.count = 0;
6984
6985 return ret;
6986}
6987
6988static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
6989 int size, unsigned short port,
6990 const void *val, unsigned int count)
6991{
6992 return emulator_pio_out(emul_to_vcpu(ctxt), size, port, val, count);
6993}
6994
6995static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
6996{
6997 return static_call(kvm_x86_get_segment_base)(vcpu, seg);
6998}
6999
7000static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
7001{
7002 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
7003}
7004
7005static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
7006{
7007 if (!need_emulate_wbinvd(vcpu))
7008 return X86EMUL_CONTINUE;
7009
7010 if (static_call(kvm_x86_has_wbinvd_exit)()) {
7011 int cpu = get_cpu();
7012
7013 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
7014 on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
7015 wbinvd_ipi, NULL, 1);
7016 put_cpu();
7017 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
7018 } else
7019 wbinvd();
7020 return X86EMUL_CONTINUE;
7021}
7022
7023int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
7024{
7025 kvm_emulate_wbinvd_noskip(vcpu);
7026 return kvm_skip_emulated_instruction(vcpu);
7027}
7028EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
7029
7030
7031
7032static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
7033{
7034 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
7035}
7036
7037static void emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
7038 unsigned long *dest)
7039{
7040 kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
7041}
7042
7043static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
7044 unsigned long value)
7045{
7046
7047 return kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
7048}
7049
7050static u64 mk_cr_64(u64 curr_cr, u32 new_val)
7051{
7052 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
7053}
7054
7055static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
7056{
7057 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7058 unsigned long value;
7059
7060 switch (cr) {
7061 case 0:
7062 value = kvm_read_cr0(vcpu);
7063 break;
7064 case 2:
7065 value = vcpu->arch.cr2;
7066 break;
7067 case 3:
7068 value = kvm_read_cr3(vcpu);
7069 break;
7070 case 4:
7071 value = kvm_read_cr4(vcpu);
7072 break;
7073 case 8:
7074 value = kvm_get_cr8(vcpu);
7075 break;
7076 default:
7077 kvm_err("%s: unexpected cr %u\n", __func__, cr);
7078 return 0;
7079 }
7080
7081 return value;
7082}
7083
7084static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
7085{
7086 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7087 int res = 0;
7088
7089 switch (cr) {
7090 case 0:
7091 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
7092 break;
7093 case 2:
7094 vcpu->arch.cr2 = val;
7095 break;
7096 case 3:
7097 res = kvm_set_cr3(vcpu, val);
7098 break;
7099 case 4:
7100 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
7101 break;
7102 case 8:
7103 res = kvm_set_cr8(vcpu, val);
7104 break;
7105 default:
7106 kvm_err("%s: unexpected cr %u\n", __func__, cr);
7107 res = -1;
7108 }
7109
7110 return res;
7111}
7112
7113static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
7114{
7115 return static_call(kvm_x86_get_cpl)(emul_to_vcpu(ctxt));
7116}
7117
7118static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7119{
7120 static_call(kvm_x86_get_gdt)(emul_to_vcpu(ctxt), dt);
7121}
7122
7123static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7124{
7125 static_call(kvm_x86_get_idt)(emul_to_vcpu(ctxt), dt);
7126}
7127
7128static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7129{
7130 static_call(kvm_x86_set_gdt)(emul_to_vcpu(ctxt), dt);
7131}
7132
7133static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7134{
7135 static_call(kvm_x86_set_idt)(emul_to_vcpu(ctxt), dt);
7136}
7137
7138static unsigned long emulator_get_cached_segment_base(
7139 struct x86_emulate_ctxt *ctxt, int seg)
7140{
7141 return get_segment_base(emul_to_vcpu(ctxt), seg);
7142}
7143
7144static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
7145 struct desc_struct *desc, u32 *base3,
7146 int seg)
7147{
7148 struct kvm_segment var;
7149
7150 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
7151 *selector = var.selector;
7152
7153 if (var.unusable) {
7154 memset(desc, 0, sizeof(*desc));
7155 if (base3)
7156 *base3 = 0;
7157 return false;
7158 }
7159
7160 if (var.g)
7161 var.limit >>= 12;
7162 set_desc_limit(desc, var.limit);
7163 set_desc_base(desc, (unsigned long)var.base);
7164#ifdef CONFIG_X86_64
7165 if (base3)
7166 *base3 = var.base >> 32;
7167#endif
7168 desc->type = var.type;
7169 desc->s = var.s;
7170 desc->dpl = var.dpl;
7171 desc->p = var.present;
7172 desc->avl = var.avl;
7173 desc->l = var.l;
7174 desc->d = var.db;
7175 desc->g = var.g;
7176
7177 return true;
7178}
7179
7180static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
7181 struct desc_struct *desc, u32 base3,
7182 int seg)
7183{
7184 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7185 struct kvm_segment var;
7186
7187 var.selector = selector;
7188 var.base = get_desc_base(desc);
7189#ifdef CONFIG_X86_64
7190 var.base |= ((u64)base3) << 32;
7191#endif
7192 var.limit = get_desc_limit(desc);
7193 if (desc->g)
7194 var.limit = (var.limit << 12) | 0xfff;
7195 var.type = desc->type;
7196 var.dpl = desc->dpl;
7197 var.db = desc->d;
7198 var.s = desc->s;
7199 var.l = desc->l;
7200 var.g = desc->g;
7201 var.avl = desc->avl;
7202 var.present = desc->p;
7203 var.unusable = !var.present;
7204 var.padding = 0;
7205
7206 kvm_set_segment(vcpu, &var, seg);
7207 return;
7208}
7209
7210static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
7211 u32 msr_index, u64 *pdata)
7212{
7213 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7214 int r;
7215
7216 r = kvm_get_msr(vcpu, msr_index, pdata);
7217
7218 if (r && kvm_get_msr_user_space(vcpu, msr_index, r)) {
7219
7220 return X86EMUL_IO_NEEDED;
7221 }
7222
7223 return r;
7224}
7225
7226static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
7227 u32 msr_index, u64 data)
7228{
7229 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7230 int r;
7231
7232 r = kvm_set_msr(vcpu, msr_index, data);
7233
7234 if (r && kvm_set_msr_user_space(vcpu, msr_index, data, r)) {
7235
7236 return X86EMUL_IO_NEEDED;
7237 }
7238
7239 return r;
7240}
7241
7242static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
7243{
7244 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7245
7246 return vcpu->arch.smbase;
7247}
7248
7249static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
7250{
7251 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7252
7253 vcpu->arch.smbase = smbase;
7254}
7255
7256static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
7257 u32 pmc)
7258{
7259 return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
7260}
7261
7262static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
7263 u32 pmc, u64 *pdata)
7264{
7265 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
7266}
7267
7268static void emulator_halt(struct x86_emulate_ctxt *ctxt)
7269{
7270 emul_to_vcpu(ctxt)->arch.halt_request = 1;
7271}
7272
7273static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
7274 struct x86_instruction_info *info,
7275 enum x86_intercept_stage stage)
7276{
7277 return static_call(kvm_x86_check_intercept)(emul_to_vcpu(ctxt), info, stage,
7278 &ctxt->exception);
7279}
7280
7281static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
7282 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx,
7283 bool exact_only)
7284{
7285 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
7286}
7287
7288static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
7289{
7290 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
7291}
7292
7293static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
7294{
7295 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
7296}
7297
7298static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
7299{
7300 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
7301}
7302
7303static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
7304{
7305 return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
7306}
7307
7308static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
7309{
7310 kvm_register_write_raw(emul_to_vcpu(ctxt), reg, val);
7311}
7312
7313static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
7314{
7315 static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
7316}
7317
7318static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
7319{
7320 return emul_to_vcpu(ctxt)->arch.hflags;
7321}
7322
7323static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
7324{
7325 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7326
7327 kvm_smm_changed(vcpu, false);
7328}
7329
7330static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
7331 const char *smstate)
7332{
7333 return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
7334}
7335
7336static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
7337{
7338 kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
7339}
7340
7341static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
7342{
7343 return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
7344}
7345
7346static const struct x86_emulate_ops emulate_ops = {
7347 .read_gpr = emulator_read_gpr,
7348 .write_gpr = emulator_write_gpr,
7349 .read_std = emulator_read_std,
7350 .write_std = emulator_write_std,
7351 .read_phys = kvm_read_guest_phys_system,
7352 .fetch = kvm_fetch_guest_virt,
7353 .read_emulated = emulator_read_emulated,
7354 .write_emulated = emulator_write_emulated,
7355 .cmpxchg_emulated = emulator_cmpxchg_emulated,
7356 .invlpg = emulator_invlpg,
7357 .pio_in_emulated = emulator_pio_in_emulated,
7358 .pio_out_emulated = emulator_pio_out_emulated,
7359 .get_segment = emulator_get_segment,
7360 .set_segment = emulator_set_segment,
7361 .get_cached_segment_base = emulator_get_cached_segment_base,
7362 .get_gdt = emulator_get_gdt,
7363 .get_idt = emulator_get_idt,
7364 .set_gdt = emulator_set_gdt,
7365 .set_idt = emulator_set_idt,
7366 .get_cr = emulator_get_cr,
7367 .set_cr = emulator_set_cr,
7368 .cpl = emulator_get_cpl,
7369 .get_dr = emulator_get_dr,
7370 .set_dr = emulator_set_dr,
7371 .get_smbase = emulator_get_smbase,
7372 .set_smbase = emulator_set_smbase,
7373 .set_msr = emulator_set_msr,
7374 .get_msr = emulator_get_msr,
7375 .check_pmc = emulator_check_pmc,
7376 .read_pmc = emulator_read_pmc,
7377 .halt = emulator_halt,
7378 .wbinvd = emulator_wbinvd,
7379 .fix_hypercall = emulator_fix_hypercall,
7380 .intercept = emulator_intercept,
7381 .get_cpuid = emulator_get_cpuid,
7382 .guest_has_long_mode = emulator_guest_has_long_mode,
7383 .guest_has_movbe = emulator_guest_has_movbe,
7384 .guest_has_fxsr = emulator_guest_has_fxsr,
7385 .set_nmi_mask = emulator_set_nmi_mask,
7386 .get_hflags = emulator_get_hflags,
7387 .exiting_smm = emulator_exiting_smm,
7388 .leave_smm = emulator_leave_smm,
7389 .triple_fault = emulator_triple_fault,
7390 .set_xcr = emulator_set_xcr,
7391};
7392
7393static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
7394{
7395 u32 int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
7396
7397
7398
7399
7400
7401
7402
7403 if (int_shadow & mask)
7404 mask = 0;
7405 if (unlikely(int_shadow || mask)) {
7406 static_call(kvm_x86_set_interrupt_shadow)(vcpu, mask);
7407 if (!mask)
7408 kvm_make_request(KVM_REQ_EVENT, vcpu);
7409 }
7410}
7411
7412static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
7413{
7414 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7415 if (ctxt->exception.vector == PF_VECTOR)
7416 return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
7417
7418 if (ctxt->exception.error_code_valid)
7419 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
7420 ctxt->exception.error_code);
7421 else
7422 kvm_queue_exception(vcpu, ctxt->exception.vector);
7423 return false;
7424}
7425
7426static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
7427{
7428 struct x86_emulate_ctxt *ctxt;
7429
7430 ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT);
7431 if (!ctxt) {
7432 pr_err("kvm: failed to allocate vcpu's emulator\n");
7433 return NULL;
7434 }
7435
7436 ctxt->vcpu = vcpu;
7437 ctxt->ops = &emulate_ops;
7438 vcpu->arch.emulate_ctxt = ctxt;
7439
7440 return ctxt;
7441}
7442
7443static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
7444{
7445 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7446 int cs_db, cs_l;
7447
7448 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
7449
7450 ctxt->gpa_available = false;
7451 ctxt->eflags = kvm_get_rflags(vcpu);
7452 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
7453
7454 ctxt->eip = kvm_rip_read(vcpu);
7455 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
7456 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
7457 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
7458 cs_db ? X86EMUL_MODE_PROT32 :
7459 X86EMUL_MODE_PROT16;
7460 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
7461 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
7462 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
7463
7464 ctxt->interruptibility = 0;
7465 ctxt->have_exception = false;
7466 ctxt->exception.vector = -1;
7467 ctxt->perm_ok = false;
7468
7469 init_decode_cache(ctxt);
7470 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
7471}
7472
7473void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
7474{
7475 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7476 int ret;
7477
7478 init_emulate_ctxt(vcpu);
7479
7480 ctxt->op_bytes = 2;
7481 ctxt->ad_bytes = 2;
7482 ctxt->_eip = ctxt->eip + inc_eip;
7483 ret = emulate_int_real(ctxt, irq);
7484
7485 if (ret != X86EMUL_CONTINUE) {
7486 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
7487 } else {
7488 ctxt->eip = ctxt->_eip;
7489 kvm_rip_write(vcpu, ctxt->eip);
7490 kvm_set_rflags(vcpu, ctxt->eflags);
7491 }
7492}
7493EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
7494
7495static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
7496{
7497 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7498 u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
7499 struct kvm_run *run = vcpu->run;
7500
7501 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7502 run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
7503 run->emulation_failure.ndata = 0;
7504 run->emulation_failure.flags = 0;
7505
7506 if (insn_size) {
7507 run->emulation_failure.ndata = 3;
7508 run->emulation_failure.flags |=
7509 KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
7510 run->emulation_failure.insn_size = insn_size;
7511 memset(run->emulation_failure.insn_bytes, 0x90,
7512 sizeof(run->emulation_failure.insn_bytes));
7513 memcpy(run->emulation_failure.insn_bytes,
7514 ctxt->fetch.data, insn_size);
7515 }
7516}
7517
7518static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
7519{
7520 struct kvm *kvm = vcpu->kvm;
7521
7522 ++vcpu->stat.insn_emulation_fail;
7523 trace_kvm_emulate_insn_failed(vcpu);
7524
7525 if (emulation_type & EMULTYPE_VMWARE_GP) {
7526 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
7527 return 1;
7528 }
7529
7530 if (kvm->arch.exit_on_emulation_error ||
7531 (emulation_type & EMULTYPE_SKIP)) {
7532 prepare_emulation_failure_exit(vcpu);
7533 return 0;
7534 }
7535
7536 kvm_queue_exception(vcpu, UD_VECTOR);
7537
7538 if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
7539 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7540 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7541 vcpu->run->internal.ndata = 0;
7542 return 0;
7543 }
7544
7545 return 1;
7546}
7547
7548static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
7549 bool write_fault_to_shadow_pgtable,
7550 int emulation_type)
7551{
7552 gpa_t gpa = cr2_or_gpa;
7553 kvm_pfn_t pfn;
7554
7555 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
7556 return false;
7557
7558 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
7559 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
7560 return false;
7561
7562 if (!vcpu->arch.mmu->direct_map) {
7563
7564
7565
7566
7567 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
7568
7569
7570
7571
7572
7573 if (gpa == UNMAPPED_GVA)
7574 return true;
7575 }
7576
7577
7578
7579
7580
7581
7582
7583 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
7584
7585
7586
7587
7588
7589 if (is_error_noslot_pfn(pfn))
7590 return false;
7591
7592 kvm_release_pfn_clean(pfn);
7593
7594
7595 if (vcpu->arch.mmu->direct_map) {
7596 unsigned int indirect_shadow_pages;
7597
7598 write_lock(&vcpu->kvm->mmu_lock);
7599 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
7600 write_unlock(&vcpu->kvm->mmu_lock);
7601
7602 if (indirect_shadow_pages)
7603 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7604
7605 return true;
7606 }
7607
7608
7609
7610
7611
7612
7613 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7614
7615
7616
7617
7618
7619
7620 return !write_fault_to_shadow_pgtable;
7621}
7622
7623static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
7624 gpa_t cr2_or_gpa, int emulation_type)
7625{
7626 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7627 unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
7628
7629 last_retry_eip = vcpu->arch.last_retry_eip;
7630 last_retry_addr = vcpu->arch.last_retry_addr;
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
7646
7647 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
7648 return false;
7649
7650 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
7651 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
7652 return false;
7653
7654 if (x86_page_table_writing_insn(ctxt))
7655 return false;
7656
7657 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
7658 return false;
7659
7660 vcpu->arch.last_retry_eip = ctxt->eip;
7661 vcpu->arch.last_retry_addr = cr2_or_gpa;
7662
7663 if (!vcpu->arch.mmu->direct_map)
7664 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
7665
7666 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7667
7668 return true;
7669}
7670
7671static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
7672static int complete_emulated_pio(struct kvm_vcpu *vcpu);
7673
7674static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
7675{
7676 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
7677
7678 if (entering_smm) {
7679 vcpu->arch.hflags |= HF_SMM_MASK;
7680 } else {
7681 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
7682
7683
7684 kvm_make_request(KVM_REQ_EVENT, vcpu);
7685
7686
7687
7688
7689
7690
7691 vcpu->arch.pdptrs_from_userspace = false;
7692 }
7693
7694 kvm_mmu_reset_context(vcpu);
7695}
7696
7697static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
7698 unsigned long *db)
7699{
7700 u32 dr6 = 0;
7701 int i;
7702 u32 enable, rwlen;
7703
7704 enable = dr7;
7705 rwlen = dr7 >> 16;
7706 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
7707 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
7708 dr6 |= (1 << i);
7709 return dr6;
7710}
7711
7712static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
7713{
7714 struct kvm_run *kvm_run = vcpu->run;
7715
7716 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
7717 kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW;
7718 kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
7719 kvm_run->debug.arch.exception = DB_VECTOR;
7720 kvm_run->exit_reason = KVM_EXIT_DEBUG;
7721 return 0;
7722 }
7723 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
7724 return 1;
7725}
7726
7727int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
7728{
7729 unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
7730 int r;
7731
7732 r = static_call(kvm_x86_skip_emulated_instruction)(vcpu);
7733 if (unlikely(!r))
7734 return 0;
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744 if (unlikely(rflags & X86_EFLAGS_TF))
7745 r = kvm_vcpu_do_singlestep(vcpu);
7746 return r;
7747}
7748EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
7749
7750static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
7751{
7752 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
7753 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
7754 struct kvm_run *kvm_run = vcpu->run;
7755 unsigned long eip = kvm_get_linear_rip(vcpu);
7756 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
7757 vcpu->arch.guest_debug_dr7,
7758 vcpu->arch.eff_db);
7759
7760 if (dr6 != 0) {
7761 kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
7762 kvm_run->debug.arch.pc = eip;
7763 kvm_run->debug.arch.exception = DB_VECTOR;
7764 kvm_run->exit_reason = KVM_EXIT_DEBUG;
7765 *r = 0;
7766 return true;
7767 }
7768 }
7769
7770 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
7771 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
7772 unsigned long eip = kvm_get_linear_rip(vcpu);
7773 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
7774 vcpu->arch.dr7,
7775 vcpu->arch.db);
7776
7777 if (dr6 != 0) {
7778 kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
7779 *r = 1;
7780 return true;
7781 }
7782 }
7783
7784 return false;
7785}
7786
7787static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
7788{
7789 switch (ctxt->opcode_len) {
7790 case 1:
7791 switch (ctxt->b) {
7792 case 0xe4:
7793 case 0xe5:
7794 case 0xec:
7795 case 0xed:
7796 case 0xe6:
7797 case 0xe7:
7798 case 0xee:
7799 case 0xef:
7800 case 0x6c:
7801 case 0x6d:
7802 case 0x6e:
7803 case 0x6f:
7804 return true;
7805 }
7806 break;
7807 case 2:
7808 switch (ctxt->b) {
7809 case 0x33:
7810 return true;
7811 }
7812 break;
7813 }
7814
7815 return false;
7816}
7817
7818
7819
7820
7821int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
7822 void *insn, int insn_len)
7823{
7824 int r = EMULATION_OK;
7825 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7826
7827 init_emulate_ctxt(vcpu);
7828
7829
7830
7831
7832
7833
7834 if (!(emulation_type & EMULTYPE_SKIP) &&
7835 kvm_vcpu_check_breakpoint(vcpu, &r))
7836 return r;
7837
7838 r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
7839
7840 trace_kvm_emulate_insn_start(vcpu);
7841 ++vcpu->stat.insn_emulation;
7842
7843 return r;
7844}
7845EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
7846
7847int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
7848 int emulation_type, void *insn, int insn_len)
7849{
7850 int r;
7851 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7852 bool writeback = true;
7853 bool write_fault_to_spt;
7854
7855 if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len)))
7856 return 1;
7857
7858 vcpu->arch.l1tf_flush_l1d = true;
7859
7860
7861
7862
7863
7864 write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
7865 vcpu->arch.write_fault_to_shadow_pgtable = false;
7866
7867 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
7868 kvm_clear_exception_queue(vcpu);
7869
7870 r = x86_decode_emulated_instruction(vcpu, emulation_type,
7871 insn, insn_len);
7872 if (r != EMULATION_OK) {
7873 if ((emulation_type & EMULTYPE_TRAP_UD) ||
7874 (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
7875 kvm_queue_exception(vcpu, UD_VECTOR);
7876 return 1;
7877 }
7878 if (reexecute_instruction(vcpu, cr2_or_gpa,
7879 write_fault_to_spt,
7880 emulation_type))
7881 return 1;
7882 if (ctxt->have_exception) {
7883
7884
7885
7886
7887 WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
7888 exception_type(ctxt->exception.vector) == EXCPT_TRAP);
7889 inject_emulated_exception(vcpu);
7890 return 1;
7891 }
7892 return handle_emulation_failure(vcpu, emulation_type);
7893 }
7894 }
7895
7896 if ((emulation_type & EMULTYPE_VMWARE_GP) &&
7897 !is_vmware_backdoor_opcode(ctxt)) {
7898 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
7899 return 1;
7900 }
7901
7902
7903
7904
7905
7906
7907 if (emulation_type & EMULTYPE_SKIP) {
7908 kvm_rip_write(vcpu, ctxt->_eip);
7909 if (ctxt->eflags & X86_EFLAGS_RF)
7910 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
7911 return 1;
7912 }
7913
7914 if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
7915 return 1;
7916
7917
7918
7919 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
7920 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
7921 emulator_invalidate_register_cache(ctxt);
7922 }
7923
7924restart:
7925 if (emulation_type & EMULTYPE_PF) {
7926
7927 ctxt->exception.address = cr2_or_gpa;
7928
7929
7930 if (vcpu->arch.mmu->direct_map) {
7931 ctxt->gpa_available = true;
7932 ctxt->gpa_val = cr2_or_gpa;
7933 }
7934 } else {
7935
7936 ctxt->exception.address = 0;
7937 }
7938
7939 r = x86_emulate_insn(ctxt);
7940
7941 if (r == EMULATION_INTERCEPTED)
7942 return 1;
7943
7944 if (r == EMULATION_FAILED) {
7945 if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
7946 emulation_type))
7947 return 1;
7948
7949 return handle_emulation_failure(vcpu, emulation_type);
7950 }
7951
7952 if (ctxt->have_exception) {
7953 r = 1;
7954 if (inject_emulated_exception(vcpu))
7955 return r;
7956 } else if (vcpu->arch.pio.count) {
7957 if (!vcpu->arch.pio.in) {
7958
7959 vcpu->arch.pio.count = 0;
7960 } else {
7961 writeback = false;
7962 vcpu->arch.complete_userspace_io = complete_emulated_pio;
7963 }
7964 r = 0;
7965 } else if (vcpu->mmio_needed) {
7966 ++vcpu->stat.mmio_exits;
7967
7968 if (!vcpu->mmio_is_write)
7969 writeback = false;
7970 r = 0;
7971 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
7972 } else if (r == EMULATION_RESTART)
7973 goto restart;
7974 else
7975 r = 1;
7976
7977 if (writeback) {
7978 unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
7979 toggle_interruptibility(vcpu, ctxt->interruptibility);
7980 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
7981 if (!ctxt->have_exception ||
7982 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
7983 kvm_rip_write(vcpu, ctxt->eip);
7984 if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
7985 r = kvm_vcpu_do_singlestep(vcpu);
7986 if (kvm_x86_ops.update_emulated_instruction)
7987 static_call(kvm_x86_update_emulated_instruction)(vcpu);
7988 __kvm_set_rflags(vcpu, ctxt->eflags);
7989 }
7990
7991
7992
7993
7994
7995
7996
7997 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
7998 kvm_make_request(KVM_REQ_EVENT, vcpu);
7999 } else
8000 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
8001
8002 return r;
8003}
8004
8005int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
8006{
8007 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
8008}
8009EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
8010
8011int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
8012 void *insn, int insn_len)
8013{
8014 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
8015}
8016EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
8017
8018static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
8019{
8020 vcpu->arch.pio.count = 0;
8021 return 1;
8022}
8023
8024static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
8025{
8026 vcpu->arch.pio.count = 0;
8027
8028 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
8029 return 1;
8030
8031 return kvm_skip_emulated_instruction(vcpu);
8032}
8033
8034static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
8035 unsigned short port)
8036{
8037 unsigned long val = kvm_rax_read(vcpu);
8038 int ret = emulator_pio_out(vcpu, size, port, &val, 1);
8039
8040 if (ret)
8041 return ret;
8042
8043
8044
8045
8046
8047 if (port == 0x7e &&
8048 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
8049 vcpu->arch.complete_userspace_io =
8050 complete_fast_pio_out_port_0x7e;
8051 kvm_skip_emulated_instruction(vcpu);
8052 } else {
8053 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
8054 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
8055 }
8056 return 0;
8057}
8058
8059static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
8060{
8061 unsigned long val;
8062
8063
8064 BUG_ON(vcpu->arch.pio.count != 1);
8065
8066 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
8067 vcpu->arch.pio.count = 0;
8068 return 1;
8069 }
8070
8071
8072 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
8073
8074
8075
8076
8077
8078 emulator_pio_in(vcpu, vcpu->arch.pio.size, vcpu->arch.pio.port, &val, 1);
8079 kvm_rax_write(vcpu, val);
8080
8081 return kvm_skip_emulated_instruction(vcpu);
8082}
8083
8084static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
8085 unsigned short port)
8086{
8087 unsigned long val;
8088 int ret;
8089
8090
8091 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
8092
8093 ret = emulator_pio_in(vcpu, size, port, &val, 1);
8094 if (ret) {
8095 kvm_rax_write(vcpu, val);
8096 return ret;
8097 }
8098
8099 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
8100 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
8101
8102 return 0;
8103}
8104
8105int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
8106{
8107 int ret;
8108
8109 if (in)
8110 ret = kvm_fast_pio_in(vcpu, size, port);
8111 else
8112 ret = kvm_fast_pio_out(vcpu, size, port);
8113 return ret && kvm_skip_emulated_instruction(vcpu);
8114}
8115EXPORT_SYMBOL_GPL(kvm_fast_pio);
8116
8117static int kvmclock_cpu_down_prep(unsigned int cpu)
8118{
8119 __this_cpu_write(cpu_tsc_khz, 0);
8120 return 0;
8121}
8122
8123static void tsc_khz_changed(void *data)
8124{
8125 struct cpufreq_freqs *freq = data;
8126 unsigned long khz = 0;
8127
8128 if (data)
8129 khz = freq->new;
8130 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
8131 khz = cpufreq_quick_get(raw_smp_processor_id());
8132 if (!khz)
8133 khz = tsc_khz;
8134 __this_cpu_write(cpu_tsc_khz, khz);
8135}
8136
8137#ifdef CONFIG_X86_64
8138static void kvm_hyperv_tsc_notifier(void)
8139{
8140 struct kvm *kvm;
8141 struct kvm_vcpu *vcpu;
8142 int cpu;
8143 unsigned long flags;
8144
8145 mutex_lock(&kvm_lock);
8146 list_for_each_entry(kvm, &vm_list, vm_list)
8147 kvm_make_mclock_inprogress_request(kvm);
8148
8149 hyperv_stop_tsc_emulation();
8150
8151
8152 for_each_present_cpu(cpu)
8153 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
8154 kvm_max_guest_tsc_khz = tsc_khz;
8155
8156 list_for_each_entry(kvm, &vm_list, vm_list) {
8157 struct kvm_arch *ka = &kvm->arch;
8158
8159 raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
8160 pvclock_update_vm_gtod_copy(kvm);
8161 raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
8162
8163 kvm_for_each_vcpu(cpu, vcpu, kvm)
8164 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8165
8166 kvm_for_each_vcpu(cpu, vcpu, kvm)
8167 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
8168 }
8169 mutex_unlock(&kvm_lock);
8170}
8171#endif
8172
8173static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
8174{
8175 struct kvm *kvm;
8176 struct kvm_vcpu *vcpu;
8177 int i, send_ipi = 0;
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
8219
8220 mutex_lock(&kvm_lock);
8221 list_for_each_entry(kvm, &vm_list, vm_list) {
8222 kvm_for_each_vcpu(i, vcpu, kvm) {
8223 if (vcpu->cpu != cpu)
8224 continue;
8225 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8226 if (vcpu->cpu != raw_smp_processor_id())
8227 send_ipi = 1;
8228 }
8229 }
8230 mutex_unlock(&kvm_lock);
8231
8232 if (freq->old < freq->new && send_ipi) {
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
8246 }
8247}
8248
8249static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
8250 void *data)
8251{
8252 struct cpufreq_freqs *freq = data;
8253 int cpu;
8254
8255 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
8256 return 0;
8257 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
8258 return 0;
8259
8260 for_each_cpu(cpu, freq->policy->cpus)
8261 __kvmclock_cpufreq_notifier(freq, cpu);
8262
8263 return 0;
8264}
8265
8266static struct notifier_block kvmclock_cpufreq_notifier_block = {
8267 .notifier_call = kvmclock_cpufreq_notifier
8268};
8269
8270static int kvmclock_cpu_online(unsigned int cpu)
8271{
8272 tsc_khz_changed(NULL);
8273 return 0;
8274}
8275
8276static void kvm_timer_init(void)
8277{
8278 max_tsc_khz = tsc_khz;
8279
8280 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
8281#ifdef CONFIG_CPU_FREQ
8282 struct cpufreq_policy *policy;
8283 int cpu;
8284
8285 cpu = get_cpu();
8286 policy = cpufreq_cpu_get(cpu);
8287 if (policy) {
8288 if (policy->cpuinfo.max_freq)
8289 max_tsc_khz = policy->cpuinfo.max_freq;
8290 cpufreq_cpu_put(policy);
8291 }
8292 put_cpu();
8293#endif
8294 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
8295 CPUFREQ_TRANSITION_NOTIFIER);
8296 }
8297
8298 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
8299 kvmclock_cpu_online, kvmclock_cpu_down_prep);
8300}
8301
8302DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
8303EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
8304
8305int kvm_is_in_guest(void)
8306{
8307 return __this_cpu_read(current_vcpu) != NULL;
8308}
8309
8310static int kvm_is_user_mode(void)
8311{
8312 int user_mode = 3;
8313
8314 if (__this_cpu_read(current_vcpu))
8315 user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
8316
8317 return user_mode != 0;
8318}
8319
8320static unsigned long kvm_get_guest_ip(void)
8321{
8322 unsigned long ip = 0;
8323
8324 if (__this_cpu_read(current_vcpu))
8325 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
8326
8327 return ip;
8328}
8329
8330static void kvm_handle_intel_pt_intr(void)
8331{
8332 struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
8333
8334 kvm_make_request(KVM_REQ_PMI, vcpu);
8335 __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
8336 (unsigned long *)&vcpu->arch.pmu.global_status);
8337}
8338
8339static struct perf_guest_info_callbacks kvm_guest_cbs = {
8340 .is_in_guest = kvm_is_in_guest,
8341 .is_user_mode = kvm_is_user_mode,
8342 .get_guest_ip = kvm_get_guest_ip,
8343 .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
8344};
8345
8346#ifdef CONFIG_X86_64
8347static void pvclock_gtod_update_fn(struct work_struct *work)
8348{
8349 struct kvm *kvm;
8350
8351 struct kvm_vcpu *vcpu;
8352 int i;
8353
8354 mutex_lock(&kvm_lock);
8355 list_for_each_entry(kvm, &vm_list, vm_list)
8356 kvm_for_each_vcpu(i, vcpu, kvm)
8357 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
8358 atomic_set(&kvm_guest_has_master_clock, 0);
8359 mutex_unlock(&kvm_lock);
8360}
8361
8362static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
8363
8364
8365
8366
8367
8368
8369static void pvclock_irq_work_fn(struct irq_work *w)
8370{
8371 queue_work(system_long_wq, &pvclock_gtod_work);
8372}
8373
8374static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
8375
8376
8377
8378
8379static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
8380 void *priv)
8381{
8382 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
8383 struct timekeeper *tk = priv;
8384
8385 update_pvclock_gtod(tk);
8386
8387
8388
8389
8390
8391
8392 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
8393 atomic_read(&kvm_guest_has_master_clock) != 0)
8394 irq_work_queue(&pvclock_irq_work);
8395 return 0;
8396}
8397
8398static struct notifier_block pvclock_gtod_notifier = {
8399 .notifier_call = pvclock_gtod_notify,
8400};
8401#endif
8402
8403int kvm_arch_init(void *opaque)
8404{
8405 struct kvm_x86_init_ops *ops = opaque;
8406 int r;
8407
8408 if (kvm_x86_ops.hardware_enable) {
8409 printk(KERN_ERR "kvm: already loaded the other module\n");
8410 r = -EEXIST;
8411 goto out;
8412 }
8413
8414 if (!ops->cpu_has_kvm_support()) {
8415 pr_err_ratelimited("kvm: no hardware support\n");
8416 r = -EOPNOTSUPP;
8417 goto out;
8418 }
8419 if (ops->disabled_by_bios()) {
8420 pr_err_ratelimited("kvm: disabled by bios\n");
8421 r = -EOPNOTSUPP;
8422 goto out;
8423 }
8424
8425
8426
8427
8428
8429
8430 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
8431 printk(KERN_ERR "kvm: inadequate fpu\n");
8432 r = -EOPNOTSUPP;
8433 goto out;
8434 }
8435
8436 r = -ENOMEM;
8437 x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
8438 __alignof__(struct fpu), SLAB_ACCOUNT,
8439 NULL);
8440 if (!x86_fpu_cache) {
8441 printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
8442 goto out;
8443 }
8444
8445 x86_emulator_cache = kvm_alloc_emulator_cache();
8446 if (!x86_emulator_cache) {
8447 pr_err("kvm: failed to allocate cache for x86 emulator\n");
8448 goto out_free_x86_fpu_cache;
8449 }
8450
8451 user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
8452 if (!user_return_msrs) {
8453 printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
8454 goto out_free_x86_emulator_cache;
8455 }
8456 kvm_nr_uret_msrs = 0;
8457
8458 r = kvm_mmu_module_init();
8459 if (r)
8460 goto out_free_percpu;
8461
8462 kvm_timer_init();
8463
8464 perf_register_guest_info_callbacks(&kvm_guest_cbs);
8465
8466 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
8467 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
8468 supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
8469 }
8470
8471 if (pi_inject_timer == -1)
8472 pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
8473#ifdef CONFIG_X86_64
8474 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
8475
8476 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
8477 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
8478#endif
8479
8480 return 0;
8481
8482out_free_percpu:
8483 free_percpu(user_return_msrs);
8484out_free_x86_emulator_cache:
8485 kmem_cache_destroy(x86_emulator_cache);
8486out_free_x86_fpu_cache:
8487 kmem_cache_destroy(x86_fpu_cache);
8488out:
8489 return r;
8490}
8491
8492void kvm_arch_exit(void)
8493{
8494#ifdef CONFIG_X86_64
8495 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
8496 clear_hv_tscchange_cb();
8497#endif
8498 kvm_lapic_exit();
8499 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
8500
8501 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
8502 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
8503 CPUFREQ_TRANSITION_NOTIFIER);
8504 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
8505#ifdef CONFIG_X86_64
8506 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
8507 irq_work_sync(&pvclock_irq_work);
8508 cancel_work_sync(&pvclock_gtod_work);
8509#endif
8510 kvm_x86_ops.hardware_enable = NULL;
8511 kvm_mmu_module_exit();
8512 free_percpu(user_return_msrs);
8513 kmem_cache_destroy(x86_emulator_cache);
8514 kmem_cache_destroy(x86_fpu_cache);
8515#ifdef CONFIG_KVM_XEN
8516 static_key_deferred_flush(&kvm_xen_enabled);
8517 WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
8518#endif
8519}
8520
8521static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
8522{
8523 ++vcpu->stat.halt_exits;
8524 if (lapic_in_kernel(vcpu)) {
8525 vcpu->arch.mp_state = state;
8526 return 1;
8527 } else {
8528 vcpu->run->exit_reason = reason;
8529 return 0;
8530 }
8531}
8532
8533int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
8534{
8535 return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
8536}
8537EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
8538
8539int kvm_emulate_halt(struct kvm_vcpu *vcpu)
8540{
8541 int ret = kvm_skip_emulated_instruction(vcpu);
8542
8543
8544
8545
8546 return kvm_vcpu_halt(vcpu) && ret;
8547}
8548EXPORT_SYMBOL_GPL(kvm_emulate_halt);
8549
8550int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
8551{
8552 int ret = kvm_skip_emulated_instruction(vcpu);
8553
8554 return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
8555}
8556EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
8557
8558#ifdef CONFIG_X86_64
8559static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
8560 unsigned long clock_type)
8561{
8562 struct kvm_clock_pairing clock_pairing;
8563 struct timespec64 ts;
8564 u64 cycle;
8565 int ret;
8566
8567 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
8568 return -KVM_EOPNOTSUPP;
8569
8570 if (!kvm_get_walltime_and_clockread(&ts, &cycle))
8571 return -KVM_EOPNOTSUPP;
8572
8573 clock_pairing.sec = ts.tv_sec;
8574 clock_pairing.nsec = ts.tv_nsec;
8575 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
8576 clock_pairing.flags = 0;
8577 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
8578
8579 ret = 0;
8580 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
8581 sizeof(struct kvm_clock_pairing)))
8582 ret = -KVM_EFAULT;
8583
8584 return ret;
8585}
8586#endif
8587
8588
8589
8590
8591
8592
8593static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
8594{
8595 struct kvm_lapic_irq lapic_irq;
8596
8597 lapic_irq.shorthand = APIC_DEST_NOSHORT;
8598 lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
8599 lapic_irq.level = 0;
8600 lapic_irq.dest_id = apicid;
8601 lapic_irq.msi_redir_hint = false;
8602
8603 lapic_irq.delivery_mode = APIC_DM_REMRD;
8604 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
8605}
8606
8607bool kvm_apicv_activated(struct kvm *kvm)
8608{
8609 return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
8610}
8611EXPORT_SYMBOL_GPL(kvm_apicv_activated);
8612
8613static void kvm_apicv_init(struct kvm *kvm)
8614{
8615 mutex_init(&kvm->arch.apicv_update_lock);
8616
8617 if (enable_apicv)
8618 clear_bit(APICV_INHIBIT_REASON_DISABLE,
8619 &kvm->arch.apicv_inhibit_reasons);
8620 else
8621 set_bit(APICV_INHIBIT_REASON_DISABLE,
8622 &kvm->arch.apicv_inhibit_reasons);
8623}
8624
8625static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
8626{
8627 struct kvm_vcpu *target = NULL;
8628 struct kvm_apic_map *map;
8629
8630 vcpu->stat.directed_yield_attempted++;
8631
8632 if (single_task_running())
8633 goto no_yield;
8634
8635 rcu_read_lock();
8636 map = rcu_dereference(vcpu->kvm->arch.apic_map);
8637
8638 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
8639 target = map->phys_map[dest_id]->vcpu;
8640
8641 rcu_read_unlock();
8642
8643 if (!target || !READ_ONCE(target->ready))
8644 goto no_yield;
8645
8646
8647 if (vcpu == target)
8648 goto no_yield;
8649
8650 if (kvm_vcpu_yield_to(target) <= 0)
8651 goto no_yield;
8652
8653 vcpu->stat.directed_yield_successful++;
8654
8655no_yield:
8656 return;
8657}
8658
8659static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
8660{
8661 u64 ret = vcpu->run->hypercall.ret;
8662
8663 if (!is_64_bit_mode(vcpu))
8664 ret = (u32)ret;
8665 kvm_rax_write(vcpu, ret);
8666 ++vcpu->stat.hypercalls;
8667 return kvm_skip_emulated_instruction(vcpu);
8668}
8669
8670int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
8671{
8672 unsigned long nr, a0, a1, a2, a3, ret;
8673 int op_64_bit;
8674
8675 if (kvm_xen_hypercall_enabled(vcpu->kvm))
8676 return kvm_xen_hypercall(vcpu);
8677
8678 if (kvm_hv_hypercall_enabled(vcpu))
8679 return kvm_hv_hypercall(vcpu);
8680
8681 nr = kvm_rax_read(vcpu);
8682 a0 = kvm_rbx_read(vcpu);
8683 a1 = kvm_rcx_read(vcpu);
8684 a2 = kvm_rdx_read(vcpu);
8685 a3 = kvm_rsi_read(vcpu);
8686
8687 trace_kvm_hypercall(nr, a0, a1, a2, a3);
8688
8689 op_64_bit = is_64_bit_mode(vcpu);
8690 if (!op_64_bit) {
8691 nr &= 0xFFFFFFFF;
8692 a0 &= 0xFFFFFFFF;
8693 a1 &= 0xFFFFFFFF;
8694 a2 &= 0xFFFFFFFF;
8695 a3 &= 0xFFFFFFFF;
8696 }
8697
8698 if (static_call(kvm_x86_get_cpl)(vcpu) != 0) {
8699 ret = -KVM_EPERM;
8700 goto out;
8701 }
8702
8703 ret = -KVM_ENOSYS;
8704
8705 switch (nr) {
8706 case KVM_HC_VAPIC_POLL_IRQ:
8707 ret = 0;
8708 break;
8709 case KVM_HC_KICK_CPU:
8710 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
8711 break;
8712
8713 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
8714 kvm_sched_yield(vcpu, a1);
8715 ret = 0;
8716 break;
8717#ifdef CONFIG_X86_64
8718 case KVM_HC_CLOCK_PAIRING:
8719 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
8720 break;
8721#endif
8722 case KVM_HC_SEND_IPI:
8723 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
8724 break;
8725
8726 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
8727 break;
8728 case KVM_HC_SCHED_YIELD:
8729 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
8730 break;
8731
8732 kvm_sched_yield(vcpu, a0);
8733 ret = 0;
8734 break;
8735 case KVM_HC_MAP_GPA_RANGE: {
8736 u64 gpa = a0, npages = a1, attrs = a2;
8737
8738 ret = -KVM_ENOSYS;
8739 if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE)))
8740 break;
8741
8742 if (!PAGE_ALIGNED(gpa) || !npages ||
8743 gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
8744 ret = -KVM_EINVAL;
8745 break;
8746 }
8747
8748 vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
8749 vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
8750 vcpu->run->hypercall.args[0] = gpa;
8751 vcpu->run->hypercall.args[1] = npages;
8752 vcpu->run->hypercall.args[2] = attrs;
8753 vcpu->run->hypercall.longmode = op_64_bit;
8754 vcpu->arch.complete_userspace_io = complete_hypercall_exit;
8755 return 0;
8756 }
8757 default:
8758 ret = -KVM_ENOSYS;
8759 break;
8760 }
8761out:
8762 if (!op_64_bit)
8763 ret = (u32)ret;
8764 kvm_rax_write(vcpu, ret);
8765
8766 ++vcpu->stat.hypercalls;
8767 return kvm_skip_emulated_instruction(vcpu);
8768}
8769EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
8770
8771static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
8772{
8773 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
8774 char instruction[3];
8775 unsigned long rip = kvm_rip_read(vcpu);
8776
8777 static_call(kvm_x86_patch_hypercall)(vcpu, instruction);
8778
8779 return emulator_write_emulated(ctxt, rip, instruction, 3,
8780 &ctxt->exception);
8781}
8782
8783static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
8784{
8785 return vcpu->run->request_interrupt_window &&
8786 likely(!pic_in_kernel(vcpu->kvm));
8787}
8788
8789static void post_kvm_run_save(struct kvm_vcpu *vcpu)
8790{
8791 struct kvm_run *kvm_run = vcpu->run;
8792
8793
8794
8795
8796
8797
8798 kvm_run->if_flag = !vcpu->arch.guest_state_protected
8799 && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
8800
8801 kvm_run->cr8 = kvm_get_cr8(vcpu);
8802 kvm_run->apic_base = kvm_get_apic_base(vcpu);
8803
8804
8805
8806
8807
8808
8809 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8810 kvm_run->ready_for_interrupt_injection =
8811 pic_in_kernel(vcpu->kvm) ||
8812 kvm_vcpu_ready_for_interrupt_injection(vcpu);
8813 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8814
8815 if (is_smm(vcpu))
8816 kvm_run->flags |= KVM_RUN_X86_SMM;
8817}
8818
8819static void update_cr8_intercept(struct kvm_vcpu *vcpu)
8820{
8821 int max_irr, tpr;
8822
8823 if (!kvm_x86_ops.update_cr8_intercept)
8824 return;
8825
8826 if (!lapic_in_kernel(vcpu))
8827 return;
8828
8829 if (vcpu->arch.apicv_active)
8830 return;
8831
8832 if (!vcpu->arch.apic->vapic_addr)
8833 max_irr = kvm_lapic_find_highest_irr(vcpu);
8834 else
8835 max_irr = -1;
8836
8837 if (max_irr != -1)
8838 max_irr >>= 4;
8839
8840 tpr = kvm_lapic_get_cr8(vcpu);
8841
8842 static_call(kvm_x86_update_cr8_intercept)(vcpu, tpr, max_irr);
8843}
8844
8845
8846int kvm_check_nested_events(struct kvm_vcpu *vcpu)
8847{
8848 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
8849 kvm_x86_ops.nested_ops->triple_fault(vcpu);
8850 return 1;
8851 }
8852
8853 return kvm_x86_ops.nested_ops->check_events(vcpu);
8854}
8855
8856static void kvm_inject_exception(struct kvm_vcpu *vcpu)
8857{
8858 if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
8859 vcpu->arch.exception.error_code = false;
8860 static_call(kvm_x86_queue_exception)(vcpu);
8861}
8862
8863static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
8864{
8865 int r;
8866 bool can_inject = true;
8867
8868
8869
8870 if (vcpu->arch.exception.injected) {
8871 kvm_inject_exception(vcpu);
8872 can_inject = false;
8873 }
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888 else if (!vcpu->arch.exception.pending) {
8889 if (vcpu->arch.nmi_injected) {
8890 static_call(kvm_x86_set_nmi)(vcpu);
8891 can_inject = false;
8892 } else if (vcpu->arch.interrupt.injected) {
8893 static_call(kvm_x86_set_irq)(vcpu);
8894 can_inject = false;
8895 }
8896 }
8897
8898 WARN_ON_ONCE(vcpu->arch.exception.injected &&
8899 vcpu->arch.exception.pending);
8900
8901
8902
8903
8904
8905
8906
8907 if (is_guest_mode(vcpu)) {
8908 r = kvm_check_nested_events(vcpu);
8909 if (r < 0)
8910 goto out;
8911 }
8912
8913
8914 if (vcpu->arch.exception.pending) {
8915 trace_kvm_inj_exception(vcpu->arch.exception.nr,
8916 vcpu->arch.exception.has_error_code,
8917 vcpu->arch.exception.error_code);
8918
8919 vcpu->arch.exception.pending = false;
8920 vcpu->arch.exception.injected = true;
8921
8922 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
8923 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
8924 X86_EFLAGS_RF);
8925
8926 if (vcpu->arch.exception.nr == DB_VECTOR) {
8927 kvm_deliver_exception_payload(vcpu);
8928 if (vcpu->arch.dr7 & DR7_GD) {
8929 vcpu->arch.dr7 &= ~DR7_GD;
8930 kvm_update_dr7(vcpu);
8931 }
8932 }
8933
8934 kvm_inject_exception(vcpu);
8935 can_inject = false;
8936 }
8937
8938
8939 if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
8940 return 0;
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953 if (vcpu->arch.smi_pending) {
8954 r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
8955 if (r < 0)
8956 goto out;
8957 if (r) {
8958 vcpu->arch.smi_pending = false;
8959 ++vcpu->arch.smi_count;
8960 enter_smm(vcpu);
8961 can_inject = false;
8962 } else
8963 static_call(kvm_x86_enable_smi_window)(vcpu);
8964 }
8965
8966 if (vcpu->arch.nmi_pending) {
8967 r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
8968 if (r < 0)
8969 goto out;
8970 if (r) {
8971 --vcpu->arch.nmi_pending;
8972 vcpu->arch.nmi_injected = true;
8973 static_call(kvm_x86_set_nmi)(vcpu);
8974 can_inject = false;
8975 WARN_ON(static_call(kvm_x86_nmi_allowed)(vcpu, true) < 0);
8976 }
8977 if (vcpu->arch.nmi_pending)
8978 static_call(kvm_x86_enable_nmi_window)(vcpu);
8979 }
8980
8981 if (kvm_cpu_has_injectable_intr(vcpu)) {
8982 r = can_inject ? static_call(kvm_x86_interrupt_allowed)(vcpu, true) : -EBUSY;
8983 if (r < 0)
8984 goto out;
8985 if (r) {
8986 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
8987 static_call(kvm_x86_set_irq)(vcpu);
8988 WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
8989 }
8990 if (kvm_cpu_has_injectable_intr(vcpu))
8991 static_call(kvm_x86_enable_irq_window)(vcpu);
8992 }
8993
8994 if (is_guest_mode(vcpu) &&
8995 kvm_x86_ops.nested_ops->hv_timer_pending &&
8996 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
8997 *req_immediate_exit = true;
8998
8999 WARN_ON(vcpu->arch.exception.pending);
9000 return 0;
9001
9002out:
9003 if (r == -EBUSY) {
9004 *req_immediate_exit = true;
9005 r = 0;
9006 }
9007 return r;
9008}
9009
9010static void process_nmi(struct kvm_vcpu *vcpu)
9011{
9012 unsigned limit = 2;
9013
9014
9015
9016
9017
9018
9019 if (static_call(kvm_x86_get_nmi_mask)(vcpu) || vcpu->arch.nmi_injected)
9020 limit = 1;
9021
9022 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
9023 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
9024 kvm_make_request(KVM_REQ_EVENT, vcpu);
9025}
9026
9027static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
9028{
9029 u32 flags = 0;
9030 flags |= seg->g << 23;
9031 flags |= seg->db << 22;
9032 flags |= seg->l << 21;
9033 flags |= seg->avl << 20;
9034 flags |= seg->present << 15;
9035 flags |= seg->dpl << 13;
9036 flags |= seg->s << 12;
9037 flags |= seg->type << 8;
9038 return flags;
9039}
9040
9041static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
9042{
9043 struct kvm_segment seg;
9044 int offset;
9045
9046 kvm_get_segment(vcpu, &seg, n);
9047 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
9048
9049 if (n < 3)
9050 offset = 0x7f84 + n * 12;
9051 else
9052 offset = 0x7f2c + (n - 3) * 12;
9053
9054 put_smstate(u32, buf, offset + 8, seg.base);
9055 put_smstate(u32, buf, offset + 4, seg.limit);
9056 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
9057}
9058
9059#ifdef CONFIG_X86_64
9060static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
9061{
9062 struct kvm_segment seg;
9063 int offset;
9064 u16 flags;
9065
9066 kvm_get_segment(vcpu, &seg, n);
9067 offset = 0x7e00 + n * 16;
9068
9069 flags = enter_smm_get_segment_flags(&seg) >> 8;
9070 put_smstate(u16, buf, offset, seg.selector);
9071 put_smstate(u16, buf, offset + 2, flags);
9072 put_smstate(u32, buf, offset + 4, seg.limit);
9073 put_smstate(u64, buf, offset + 8, seg.base);
9074}
9075#endif
9076
9077static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
9078{
9079 struct desc_ptr dt;
9080 struct kvm_segment seg;
9081 unsigned long val;
9082 int i;
9083
9084 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
9085 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
9086 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
9087 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
9088
9089 for (i = 0; i < 8; i++)
9090 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
9091
9092 kvm_get_dr(vcpu, 6, &val);
9093 put_smstate(u32, buf, 0x7fcc, (u32)val);
9094 kvm_get_dr(vcpu, 7, &val);
9095 put_smstate(u32, buf, 0x7fc8, (u32)val);
9096
9097 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
9098 put_smstate(u32, buf, 0x7fc4, seg.selector);
9099 put_smstate(u32, buf, 0x7f64, seg.base);
9100 put_smstate(u32, buf, 0x7f60, seg.limit);
9101 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
9102
9103 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
9104 put_smstate(u32, buf, 0x7fc0, seg.selector);
9105 put_smstate(u32, buf, 0x7f80, seg.base);
9106 put_smstate(u32, buf, 0x7f7c, seg.limit);
9107 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
9108
9109 static_call(kvm_x86_get_gdt)(vcpu, &dt);
9110 put_smstate(u32, buf, 0x7f74, dt.address);
9111 put_smstate(u32, buf, 0x7f70, dt.size);
9112
9113 static_call(kvm_x86_get_idt)(vcpu, &dt);
9114 put_smstate(u32, buf, 0x7f58, dt.address);
9115 put_smstate(u32, buf, 0x7f54, dt.size);
9116
9117 for (i = 0; i < 6; i++)
9118 enter_smm_save_seg_32(vcpu, buf, i);
9119
9120 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
9121
9122
9123 put_smstate(u32, buf, 0x7efc, 0x00020000);
9124 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
9125}
9126
9127#ifdef CONFIG_X86_64
9128static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
9129{
9130 struct desc_ptr dt;
9131 struct kvm_segment seg;
9132 unsigned long val;
9133 int i;
9134
9135 for (i = 0; i < 16; i++)
9136 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
9137
9138 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
9139 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
9140
9141 kvm_get_dr(vcpu, 6, &val);
9142 put_smstate(u64, buf, 0x7f68, val);
9143 kvm_get_dr(vcpu, 7, &val);
9144 put_smstate(u64, buf, 0x7f60, val);
9145
9146 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
9147 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
9148 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
9149
9150 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
9151
9152
9153 put_smstate(u32, buf, 0x7efc, 0x00020064);
9154
9155 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
9156
9157 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
9158 put_smstate(u16, buf, 0x7e90, seg.selector);
9159 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
9160 put_smstate(u32, buf, 0x7e94, seg.limit);
9161 put_smstate(u64, buf, 0x7e98, seg.base);
9162
9163 static_call(kvm_x86_get_idt)(vcpu, &dt);
9164 put_smstate(u32, buf, 0x7e84, dt.size);
9165 put_smstate(u64, buf, 0x7e88, dt.address);
9166
9167 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
9168 put_smstate(u16, buf, 0x7e70, seg.selector);
9169 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
9170 put_smstate(u32, buf, 0x7e74, seg.limit);
9171 put_smstate(u64, buf, 0x7e78, seg.base);
9172
9173 static_call(kvm_x86_get_gdt)(vcpu, &dt);
9174 put_smstate(u32, buf, 0x7e64, dt.size);
9175 put_smstate(u64, buf, 0x7e68, dt.address);
9176
9177 for (i = 0; i < 6; i++)
9178 enter_smm_save_seg_64(vcpu, buf, i);
9179}
9180#endif
9181
9182static void enter_smm(struct kvm_vcpu *vcpu)
9183{
9184 struct kvm_segment cs, ds;
9185 struct desc_ptr dt;
9186 unsigned long cr0;
9187 char buf[512];
9188
9189 memset(buf, 0, 512);
9190#ifdef CONFIG_X86_64
9191 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
9192 enter_smm_save_state_64(vcpu, buf);
9193 else
9194#endif
9195 enter_smm_save_state_32(vcpu, buf);
9196
9197
9198
9199
9200
9201
9202 static_call(kvm_x86_enter_smm)(vcpu, buf);
9203
9204 kvm_smm_changed(vcpu, true);
9205 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
9206
9207 if (static_call(kvm_x86_get_nmi_mask)(vcpu))
9208 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
9209 else
9210 static_call(kvm_x86_set_nmi_mask)(vcpu, true);
9211
9212 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
9213 kvm_rip_write(vcpu, 0x8000);
9214
9215 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
9216 static_call(kvm_x86_set_cr0)(vcpu, cr0);
9217 vcpu->arch.cr0 = cr0;
9218
9219 static_call(kvm_x86_set_cr4)(vcpu, 0);
9220
9221
9222 dt.address = dt.size = 0;
9223 static_call(kvm_x86_set_idt)(vcpu, &dt);
9224
9225 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
9226
9227 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
9228 cs.base = vcpu->arch.smbase;
9229
9230 ds.selector = 0;
9231 ds.base = 0;
9232
9233 cs.limit = ds.limit = 0xffffffff;
9234 cs.type = ds.type = 0x3;
9235 cs.dpl = ds.dpl = 0;
9236 cs.db = ds.db = 0;
9237 cs.s = ds.s = 1;
9238 cs.l = ds.l = 0;
9239 cs.g = ds.g = 1;
9240 cs.avl = ds.avl = 0;
9241 cs.present = ds.present = 1;
9242 cs.unusable = ds.unusable = 0;
9243 cs.padding = ds.padding = 0;
9244
9245 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
9246 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
9247 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
9248 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
9249 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
9250 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
9251
9252#ifdef CONFIG_X86_64
9253 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
9254 static_call(kvm_x86_set_efer)(vcpu, 0);
9255#endif
9256
9257 kvm_update_cpuid_runtime(vcpu);
9258 kvm_mmu_reset_context(vcpu);
9259}
9260
9261static void process_smi(struct kvm_vcpu *vcpu)
9262{
9263 vcpu->arch.smi_pending = true;
9264 kvm_make_request(KVM_REQ_EVENT, vcpu);
9265}
9266
9267void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
9268 unsigned long *vcpu_bitmap)
9269{
9270 cpumask_var_t cpus;
9271
9272 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
9273
9274 kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
9275 NULL, vcpu_bitmap, cpus);
9276
9277 free_cpumask_var(cpus);
9278}
9279
9280void kvm_make_scan_ioapic_request(struct kvm *kvm)
9281{
9282 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
9283}
9284
9285void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
9286{
9287 bool activate;
9288
9289 if (!lapic_in_kernel(vcpu))
9290 return;
9291
9292 mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
9293
9294 activate = kvm_apicv_activated(vcpu->kvm);
9295 if (vcpu->arch.apicv_active == activate)
9296 goto out;
9297
9298 vcpu->arch.apicv_active = activate;
9299 kvm_apic_update_apicv(vcpu);
9300 static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
9301
9302
9303
9304
9305
9306
9307
9308 if (!vcpu->arch.apicv_active)
9309 kvm_make_request(KVM_REQ_EVENT, vcpu);
9310
9311out:
9312 mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
9313}
9314EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
9315
9316void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
9317{
9318 unsigned long old, new;
9319
9320 if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
9321 !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
9322 return;
9323
9324 old = new = kvm->arch.apicv_inhibit_reasons;
9325
9326 if (activate)
9327 __clear_bit(bit, &new);
9328 else
9329 __set_bit(bit, &new);
9330
9331 if (!!old != !!new) {
9332 trace_kvm_apicv_update_request(activate, bit);
9333 kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
9334 kvm->arch.apicv_inhibit_reasons = new;
9335 if (new) {
9336 unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
9337 kvm_zap_gfn_range(kvm, gfn, gfn+1);
9338 }
9339 } else
9340 kvm->arch.apicv_inhibit_reasons = new;
9341}
9342EXPORT_SYMBOL_GPL(__kvm_request_apicv_update);
9343
9344void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
9345{
9346 mutex_lock(&kvm->arch.apicv_update_lock);
9347 __kvm_request_apicv_update(kvm, activate, bit);
9348 mutex_unlock(&kvm->arch.apicv_update_lock);
9349}
9350EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
9351
9352static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
9353{
9354 if (!kvm_apic_present(vcpu))
9355 return;
9356
9357 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
9358
9359 if (irqchip_split(vcpu->kvm))
9360 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
9361 else {
9362 if (vcpu->arch.apicv_active)
9363 static_call(kvm_x86_sync_pir_to_irr)(vcpu);
9364 if (ioapic_in_kernel(vcpu->kvm))
9365 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
9366 }
9367
9368 if (is_guest_mode(vcpu))
9369 vcpu->arch.load_eoi_exitmap_pending = true;
9370 else
9371 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
9372}
9373
9374static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
9375{
9376 u64 eoi_exit_bitmap[4];
9377
9378 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
9379 return;
9380
9381 if (to_hv_vcpu(vcpu))
9382 bitmap_or((ulong *)eoi_exit_bitmap,
9383 vcpu->arch.ioapic_handled_vectors,
9384 to_hv_synic(vcpu)->vec_bitmap, 256);
9385
9386 static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
9387}
9388
9389void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
9390 unsigned long start, unsigned long end)
9391{
9392 unsigned long apic_address;
9393
9394
9395
9396
9397
9398 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
9399 if (start <= apic_address && apic_address < end)
9400 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
9401}
9402
9403void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
9404{
9405 if (!lapic_in_kernel(vcpu))
9406 return;
9407
9408 if (!kvm_x86_ops.set_apic_access_page_addr)
9409 return;
9410
9411 static_call(kvm_x86_set_apic_access_page_addr)(vcpu);
9412}
9413
9414void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
9415{
9416 smp_send_reschedule(vcpu->cpu);
9417}
9418EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
9419
9420
9421
9422
9423
9424
9425static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
9426{
9427 int r;
9428 bool req_int_win =
9429 dm_request_for_irq_injection(vcpu) &&
9430 kvm_cpu_accept_dm_intr(vcpu);
9431 fastpath_t exit_fastpath;
9432
9433 bool req_immediate_exit = false;
9434
9435
9436 if (unlikely(vcpu->kvm->dirty_ring_size &&
9437 kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
9438 vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
9439 trace_kvm_dirty_ring_exit(vcpu);
9440 r = 0;
9441 goto out;
9442 }
9443
9444 if (kvm_request_pending(vcpu)) {
9445 if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
9446 r = -EIO;
9447 goto out;
9448 }
9449 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
9450 if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
9451 r = 0;
9452 goto out;
9453 }
9454 }
9455 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
9456 kvm_mmu_unload(vcpu);
9457 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
9458 __kvm_migrate_timers(vcpu);
9459 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
9460 kvm_gen_update_masterclock(vcpu->kvm);
9461 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
9462 kvm_gen_kvmclock_update(vcpu);
9463 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
9464 r = kvm_guest_time_update(vcpu);
9465 if (unlikely(r))
9466 goto out;
9467 }
9468 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
9469 kvm_mmu_sync_roots(vcpu);
9470 if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
9471 kvm_mmu_load_pgd(vcpu);
9472 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
9473 kvm_vcpu_flush_tlb_all(vcpu);
9474
9475
9476 kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
9477 }
9478 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
9479 kvm_vcpu_flush_tlb_current(vcpu);
9480 if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
9481 kvm_vcpu_flush_tlb_guest(vcpu);
9482
9483 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
9484 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
9485 r = 0;
9486 goto out;
9487 }
9488 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
9489 if (is_guest_mode(vcpu)) {
9490 kvm_x86_ops.nested_ops->triple_fault(vcpu);
9491 } else {
9492 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
9493 vcpu->mmio_needed = 0;
9494 r = 0;
9495 goto out;
9496 }
9497 }
9498 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
9499
9500 vcpu->arch.apf.halted = true;
9501 r = 1;
9502 goto out;
9503 }
9504 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
9505 record_steal_time(vcpu);
9506 if (kvm_check_request(KVM_REQ_SMI, vcpu))
9507 process_smi(vcpu);
9508 if (kvm_check_request(KVM_REQ_NMI, vcpu))
9509 process_nmi(vcpu);
9510 if (kvm_check_request(KVM_REQ_PMU, vcpu))
9511 kvm_pmu_handle_event(vcpu);
9512 if (kvm_check_request(KVM_REQ_PMI, vcpu))
9513 kvm_pmu_deliver_pmi(vcpu);
9514 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
9515 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
9516 if (test_bit(vcpu->arch.pending_ioapic_eoi,
9517 vcpu->arch.ioapic_handled_vectors)) {
9518 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
9519 vcpu->run->eoi.vector =
9520 vcpu->arch.pending_ioapic_eoi;
9521 r = 0;
9522 goto out;
9523 }
9524 }
9525 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
9526 vcpu_scan_ioapic(vcpu);
9527 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
9528 vcpu_load_eoi_exitmap(vcpu);
9529 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
9530 kvm_vcpu_reload_apic_access_page(vcpu);
9531 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
9532 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
9533 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
9534 r = 0;
9535 goto out;
9536 }
9537 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
9538 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
9539 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
9540 r = 0;
9541 goto out;
9542 }
9543 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
9544 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
9545
9546 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
9547 vcpu->run->hyperv = hv_vcpu->exit;
9548 r = 0;
9549 goto out;
9550 }
9551
9552
9553
9554
9555
9556
9557 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
9558 kvm_hv_process_stimers(vcpu);
9559 if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
9560 kvm_vcpu_update_apicv(vcpu);
9561 if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
9562 kvm_check_async_pf_completion(vcpu);
9563 if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
9564 static_call(kvm_x86_msr_filter_changed)(vcpu);
9565
9566 if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
9567 static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
9568 }
9569
9570 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
9571 kvm_xen_has_interrupt(vcpu)) {
9572 ++vcpu->stat.req_event;
9573 r = kvm_apic_accept_events(vcpu);
9574 if (r < 0) {
9575 r = 0;
9576 goto out;
9577 }
9578 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
9579 r = 1;
9580 goto out;
9581 }
9582
9583 r = inject_pending_event(vcpu, &req_immediate_exit);
9584 if (r < 0) {
9585 r = 0;
9586 goto out;
9587 }
9588 if (req_int_win)
9589 static_call(kvm_x86_enable_irq_window)(vcpu);
9590
9591 if (kvm_lapic_enabled(vcpu)) {
9592 update_cr8_intercept(vcpu);
9593 kvm_lapic_sync_to_vapic(vcpu);
9594 }
9595 }
9596
9597 r = kvm_mmu_reload(vcpu);
9598 if (unlikely(r)) {
9599 goto cancel_injection;
9600 }
9601
9602 preempt_disable();
9603
9604 static_call(kvm_x86_prepare_guest_switch)(vcpu);
9605
9606
9607
9608
9609
9610
9611 local_irq_disable();
9612 vcpu->mode = IN_GUEST_MODE;
9613
9614 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
9615
9616
9617
9618
9619
9620
9621
9622
9623
9624
9625
9626
9627
9628 smp_mb__after_srcu_read_unlock();
9629
9630
9631
9632
9633
9634 if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
9635 static_call(kvm_x86_sync_pir_to_irr)(vcpu);
9636
9637 if (kvm_vcpu_exit_request(vcpu)) {
9638 vcpu->mode = OUTSIDE_GUEST_MODE;
9639 smp_wmb();
9640 local_irq_enable();
9641 preempt_enable();
9642 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
9643 r = 1;
9644 goto cancel_injection;
9645 }
9646
9647 if (req_immediate_exit) {
9648 kvm_make_request(KVM_REQ_EVENT, vcpu);
9649 static_call(kvm_x86_request_immediate_exit)(vcpu);
9650 }
9651
9652 fpregs_assert_state_consistent();
9653 if (test_thread_flag(TIF_NEED_FPU_LOAD))
9654 switch_fpu_return();
9655
9656 if (unlikely(vcpu->arch.switch_db_regs)) {
9657 set_debugreg(0, 7);
9658 set_debugreg(vcpu->arch.eff_db[0], 0);
9659 set_debugreg(vcpu->arch.eff_db[1], 1);
9660 set_debugreg(vcpu->arch.eff_db[2], 2);
9661 set_debugreg(vcpu->arch.eff_db[3], 3);
9662 } else if (unlikely(hw_breakpoint_active())) {
9663 set_debugreg(0, 7);
9664 }
9665
9666 for (;;) {
9667 exit_fastpath = static_call(kvm_x86_run)(vcpu);
9668 if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
9669 break;
9670
9671 if (vcpu->arch.apicv_active)
9672 static_call(kvm_x86_sync_pir_to_irr)(vcpu);
9673
9674 if (unlikely(kvm_vcpu_exit_request(vcpu))) {
9675 exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
9676 break;
9677 }
9678 }
9679
9680
9681
9682
9683
9684
9685
9686 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
9687 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
9688 static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
9689 kvm_update_dr0123(vcpu);
9690 kvm_update_dr7(vcpu);
9691 }
9692
9693
9694
9695
9696
9697
9698
9699
9700 if (hw_breakpoint_active())
9701 hw_breakpoint_restore();
9702
9703 vcpu->arch.last_vmentry_cpu = vcpu->cpu;
9704 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
9705
9706 vcpu->mode = OUTSIDE_GUEST_MODE;
9707 smp_wmb();
9708
9709 static_call(kvm_x86_handle_exit_irqoff)(vcpu);
9710
9711
9712
9713
9714
9715
9716
9717
9718 kvm_before_interrupt(vcpu);
9719 local_irq_enable();
9720 ++vcpu->stat.exits;
9721 local_irq_disable();
9722 kvm_after_interrupt(vcpu);
9723
9724
9725
9726
9727
9728
9729
9730
9731 vtime_account_guest_exit();
9732
9733 if (lapic_in_kernel(vcpu)) {
9734 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
9735 if (delta != S64_MIN) {
9736 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
9737 vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
9738 }
9739 }
9740
9741 local_irq_enable();
9742 preempt_enable();
9743
9744 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
9745
9746
9747
9748
9749 if (unlikely(prof_on == KVM_PROFILING)) {
9750 unsigned long rip = kvm_rip_read(vcpu);
9751 profile_hit(KVM_PROFILING, (void *)rip);
9752 }
9753
9754 if (unlikely(vcpu->arch.tsc_always_catchup))
9755 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
9756
9757 if (vcpu->arch.apic_attention)
9758 kvm_lapic_sync_from_vapic(vcpu);
9759
9760 r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath);
9761 return r;
9762
9763cancel_injection:
9764 if (req_immediate_exit)
9765 kvm_make_request(KVM_REQ_EVENT, vcpu);
9766 static_call(kvm_x86_cancel_injection)(vcpu);
9767 if (unlikely(vcpu->arch.apic_attention))
9768 kvm_lapic_sync_from_vapic(vcpu);
9769out:
9770 return r;
9771}
9772
9773static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
9774{
9775 if (!kvm_arch_vcpu_runnable(vcpu) &&
9776 (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) {
9777 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
9778 kvm_vcpu_block(vcpu);
9779 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
9780
9781 if (kvm_x86_ops.post_block)
9782 static_call(kvm_x86_post_block)(vcpu);
9783
9784 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
9785 return 1;
9786 }
9787
9788 if (kvm_apic_accept_events(vcpu) < 0)
9789 return 0;
9790 switch(vcpu->arch.mp_state) {
9791 case KVM_MP_STATE_HALTED:
9792 case KVM_MP_STATE_AP_RESET_HOLD:
9793 vcpu->arch.pv.pv_unhalted = false;
9794 vcpu->arch.mp_state =
9795 KVM_MP_STATE_RUNNABLE;
9796 fallthrough;
9797 case KVM_MP_STATE_RUNNABLE:
9798 vcpu->arch.apf.halted = false;
9799 break;
9800 case KVM_MP_STATE_INIT_RECEIVED:
9801 break;
9802 default:
9803 return -EINTR;
9804 }
9805 return 1;
9806}
9807
9808static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
9809{
9810 if (is_guest_mode(vcpu))
9811 kvm_check_nested_events(vcpu);
9812
9813 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
9814 !vcpu->arch.apf.halted);
9815}
9816
9817static int vcpu_run(struct kvm_vcpu *vcpu)
9818{
9819 int r;
9820 struct kvm *kvm = vcpu->kvm;
9821
9822 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
9823 vcpu->arch.l1tf_flush_l1d = true;
9824
9825 for (;;) {
9826 if (kvm_vcpu_running(vcpu)) {
9827 r = vcpu_enter_guest(vcpu);
9828 } else {
9829 r = vcpu_block(kvm, vcpu);
9830 }
9831
9832 if (r <= 0)
9833 break;
9834
9835 kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
9836 if (kvm_cpu_has_pending_timer(vcpu))
9837 kvm_inject_pending_timer_irqs(vcpu);
9838
9839 if (dm_request_for_irq_injection(vcpu) &&
9840 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
9841 r = 0;
9842 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
9843 ++vcpu->stat.request_irq_exits;
9844 break;
9845 }
9846
9847 if (__xfer_to_guest_mode_work_pending()) {
9848 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
9849 r = xfer_to_guest_mode_handle_work(vcpu);
9850 if (r)
9851 return r;
9852 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
9853 }
9854 }
9855
9856 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
9857
9858 return r;
9859}
9860
9861static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
9862{
9863 int r;
9864
9865 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
9866 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
9867 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
9868 return r;
9869}
9870
9871static int complete_emulated_pio(struct kvm_vcpu *vcpu)
9872{
9873 BUG_ON(!vcpu->arch.pio.count);
9874
9875 return complete_emulated_io(vcpu);
9876}
9877
9878
9879
9880
9881
9882
9883
9884
9885
9886
9887
9888
9889
9890
9891
9892
9893
9894
9895
9896static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
9897{
9898 struct kvm_run *run = vcpu->run;
9899 struct kvm_mmio_fragment *frag;
9900 unsigned len;
9901
9902 BUG_ON(!vcpu->mmio_needed);
9903
9904
9905 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
9906 len = min(8u, frag->len);
9907 if (!vcpu->mmio_is_write)
9908 memcpy(frag->data, run->mmio.data, len);
9909
9910 if (frag->len <= 8) {
9911
9912 frag++;
9913 vcpu->mmio_cur_fragment++;
9914 } else {
9915
9916 frag->data += len;
9917 frag->gpa += len;
9918 frag->len -= len;
9919 }
9920
9921 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
9922 vcpu->mmio_needed = 0;
9923
9924
9925 if (vcpu->mmio_is_write)
9926 return 1;
9927 vcpu->mmio_read_completed = 1;
9928 return complete_emulated_io(vcpu);
9929 }
9930
9931 run->exit_reason = KVM_EXIT_MMIO;
9932 run->mmio.phys_addr = frag->gpa;
9933 if (vcpu->mmio_is_write)
9934 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
9935 run->mmio.len = min(8u, frag->len);
9936 run->mmio.is_write = vcpu->mmio_is_write;
9937 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
9938 return 0;
9939}
9940
9941static void kvm_save_current_fpu(struct fpu *fpu)
9942{
9943
9944
9945
9946
9947 if (test_thread_flag(TIF_NEED_FPU_LOAD))
9948 memcpy(&fpu->state, ¤t->thread.fpu.state,
9949 fpu_kernel_xstate_size);
9950 else
9951 save_fpregs_to_fpstate(fpu);
9952}
9953
9954
9955static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
9956{
9957 fpregs_lock();
9958
9959 kvm_save_current_fpu(vcpu->arch.user_fpu);
9960
9961
9962
9963
9964
9965 if (vcpu->arch.guest_fpu)
9966
9967 __restore_fpregs_from_fpstate(&vcpu->arch.guest_fpu->state,
9968 ~XFEATURE_MASK_PKRU);
9969
9970 fpregs_mark_activate();
9971 fpregs_unlock();
9972
9973 trace_kvm_fpu(1);
9974}
9975
9976
9977static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
9978{
9979 fpregs_lock();
9980
9981
9982
9983
9984
9985 if (vcpu->arch.guest_fpu)
9986 kvm_save_current_fpu(vcpu->arch.guest_fpu);
9987
9988 restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state);
9989
9990 fpregs_mark_activate();
9991 fpregs_unlock();
9992
9993 ++vcpu->stat.fpu_reload;
9994 trace_kvm_fpu(0);
9995}
9996
9997int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
9998{
9999 struct kvm_run *kvm_run = vcpu->run;
10000 int r;
10001
10002 vcpu_load(vcpu);
10003 kvm_sigset_activate(vcpu);
10004 kvm_run->flags = 0;
10005 kvm_load_guest_fpu(vcpu);
10006
10007 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
10008 if (kvm_run->immediate_exit) {
10009 r = -EINTR;
10010 goto out;
10011 }
10012 kvm_vcpu_block(vcpu);
10013 if (kvm_apic_accept_events(vcpu) < 0) {
10014 r = 0;
10015 goto out;
10016 }
10017 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
10018 r = -EAGAIN;
10019 if (signal_pending(current)) {
10020 r = -EINTR;
10021 kvm_run->exit_reason = KVM_EXIT_INTR;
10022 ++vcpu->stat.signal_exits;
10023 }
10024 goto out;
10025 }
10026
10027 if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
10028 (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
10029 r = -EINVAL;
10030 goto out;
10031 }
10032
10033 if (kvm_run->kvm_dirty_regs) {
10034 r = sync_regs(vcpu);
10035 if (r != 0)
10036 goto out;
10037 }
10038
10039
10040 if (!lapic_in_kernel(vcpu)) {
10041 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
10042 r = -EINVAL;
10043 goto out;
10044 }
10045 }
10046
10047 if (unlikely(vcpu->arch.complete_userspace_io)) {
10048 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
10049 vcpu->arch.complete_userspace_io = NULL;
10050 r = cui(vcpu);
10051 if (r <= 0)
10052 goto out;
10053 } else
10054 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
10055
10056 if (kvm_run->immediate_exit)
10057 r = -EINTR;
10058 else
10059 r = vcpu_run(vcpu);
10060
10061out:
10062 kvm_put_guest_fpu(vcpu);
10063 if (kvm_run->kvm_valid_regs)
10064 store_regs(vcpu);
10065 post_kvm_run_save(vcpu);
10066 kvm_sigset_deactivate(vcpu);
10067
10068 vcpu_put(vcpu);
10069 return r;
10070}
10071
10072static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10073{
10074 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
10075
10076
10077
10078
10079
10080
10081
10082 emulator_writeback_register_cache(vcpu->arch.emulate_ctxt);
10083 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
10084 }
10085 regs->rax = kvm_rax_read(vcpu);
10086 regs->rbx = kvm_rbx_read(vcpu);
10087 regs->rcx = kvm_rcx_read(vcpu);
10088 regs->rdx = kvm_rdx_read(vcpu);
10089 regs->rsi = kvm_rsi_read(vcpu);
10090 regs->rdi = kvm_rdi_read(vcpu);
10091 regs->rsp = kvm_rsp_read(vcpu);
10092 regs->rbp = kvm_rbp_read(vcpu);
10093#ifdef CONFIG_X86_64
10094 regs->r8 = kvm_r8_read(vcpu);
10095 regs->r9 = kvm_r9_read(vcpu);
10096 regs->r10 = kvm_r10_read(vcpu);
10097 regs->r11 = kvm_r11_read(vcpu);
10098 regs->r12 = kvm_r12_read(vcpu);
10099 regs->r13 = kvm_r13_read(vcpu);
10100 regs->r14 = kvm_r14_read(vcpu);
10101 regs->r15 = kvm_r15_read(vcpu);
10102#endif
10103
10104 regs->rip = kvm_rip_read(vcpu);
10105 regs->rflags = kvm_get_rflags(vcpu);
10106}
10107
10108int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10109{
10110 vcpu_load(vcpu);
10111 __get_regs(vcpu, regs);
10112 vcpu_put(vcpu);
10113 return 0;
10114}
10115
10116static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10117{
10118 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
10119 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
10120
10121 kvm_rax_write(vcpu, regs->rax);
10122 kvm_rbx_write(vcpu, regs->rbx);
10123 kvm_rcx_write(vcpu, regs->rcx);
10124 kvm_rdx_write(vcpu, regs->rdx);
10125 kvm_rsi_write(vcpu, regs->rsi);
10126 kvm_rdi_write(vcpu, regs->rdi);
10127 kvm_rsp_write(vcpu, regs->rsp);
10128 kvm_rbp_write(vcpu, regs->rbp);
10129#ifdef CONFIG_X86_64
10130 kvm_r8_write(vcpu, regs->r8);
10131 kvm_r9_write(vcpu, regs->r9);
10132 kvm_r10_write(vcpu, regs->r10);
10133 kvm_r11_write(vcpu, regs->r11);
10134 kvm_r12_write(vcpu, regs->r12);
10135 kvm_r13_write(vcpu, regs->r13);
10136 kvm_r14_write(vcpu, regs->r14);
10137 kvm_r15_write(vcpu, regs->r15);
10138#endif
10139
10140 kvm_rip_write(vcpu, regs->rip);
10141 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
10142
10143 vcpu->arch.exception.pending = false;
10144
10145 kvm_make_request(KVM_REQ_EVENT, vcpu);
10146}
10147
10148int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10149{
10150 vcpu_load(vcpu);
10151 __set_regs(vcpu, regs);
10152 vcpu_put(vcpu);
10153 return 0;
10154}
10155
10156void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
10157{
10158 struct kvm_segment cs;
10159
10160 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
10161 *db = cs.db;
10162 *l = cs.l;
10163}
10164EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
10165
10166static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10167{
10168 struct desc_ptr dt;
10169
10170 if (vcpu->arch.guest_state_protected)
10171 goto skip_protected_regs;
10172
10173 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
10174 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
10175 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
10176 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
10177 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
10178 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
10179
10180 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
10181 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
10182
10183 static_call(kvm_x86_get_idt)(vcpu, &dt);
10184 sregs->idt.limit = dt.size;
10185 sregs->idt.base = dt.address;
10186 static_call(kvm_x86_get_gdt)(vcpu, &dt);
10187 sregs->gdt.limit = dt.size;
10188 sregs->gdt.base = dt.address;
10189
10190 sregs->cr2 = vcpu->arch.cr2;
10191 sregs->cr3 = kvm_read_cr3(vcpu);
10192
10193skip_protected_regs:
10194 sregs->cr0 = kvm_read_cr0(vcpu);
10195 sregs->cr4 = kvm_read_cr4(vcpu);
10196 sregs->cr8 = kvm_get_cr8(vcpu);
10197 sregs->efer = vcpu->arch.efer;
10198 sregs->apic_base = kvm_get_apic_base(vcpu);
10199}
10200
10201static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10202{
10203 __get_sregs_common(vcpu, sregs);
10204
10205 if (vcpu->arch.guest_state_protected)
10206 return;
10207
10208 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
10209 set_bit(vcpu->arch.interrupt.nr,
10210 (unsigned long *)sregs->interrupt_bitmap);
10211}
10212
10213static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
10214{
10215 int i;
10216
10217 __get_sregs_common(vcpu, (struct kvm_sregs *)sregs2);
10218
10219 if (vcpu->arch.guest_state_protected)
10220 return;
10221
10222 if (is_pae_paging(vcpu)) {
10223 for (i = 0 ; i < 4 ; i++)
10224 sregs2->pdptrs[i] = kvm_pdptr_read(vcpu, i);
10225 sregs2->flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID;
10226 }
10227}
10228
10229int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
10230 struct kvm_sregs *sregs)
10231{
10232 vcpu_load(vcpu);
10233 __get_sregs(vcpu, sregs);
10234 vcpu_put(vcpu);
10235 return 0;
10236}
10237
10238int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
10239 struct kvm_mp_state *mp_state)
10240{
10241 int r;
10242
10243 vcpu_load(vcpu);
10244 if (kvm_mpx_supported())
10245 kvm_load_guest_fpu(vcpu);
10246
10247 r = kvm_apic_accept_events(vcpu);
10248 if (r < 0)
10249 goto out;
10250 r = 0;
10251
10252 if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
10253 vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
10254 vcpu->arch.pv.pv_unhalted)
10255 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
10256 else
10257 mp_state->mp_state = vcpu->arch.mp_state;
10258
10259out:
10260 if (kvm_mpx_supported())
10261 kvm_put_guest_fpu(vcpu);
10262 vcpu_put(vcpu);
10263 return r;
10264}
10265
10266int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
10267 struct kvm_mp_state *mp_state)
10268{
10269 int ret = -EINVAL;
10270
10271 vcpu_load(vcpu);
10272
10273 if (!lapic_in_kernel(vcpu) &&
10274 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
10275 goto out;
10276
10277
10278
10279
10280
10281
10282 if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
10283 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
10284 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
10285 goto out;
10286
10287 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
10288 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
10289 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
10290 } else
10291 vcpu->arch.mp_state = mp_state->mp_state;
10292 kvm_make_request(KVM_REQ_EVENT, vcpu);
10293
10294 ret = 0;
10295out:
10296 vcpu_put(vcpu);
10297 return ret;
10298}
10299
10300int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
10301 int reason, bool has_error_code, u32 error_code)
10302{
10303 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
10304 int ret;
10305
10306 init_emulate_ctxt(vcpu);
10307
10308 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
10309 has_error_code, error_code);
10310 if (ret) {
10311 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
10312 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
10313 vcpu->run->internal.ndata = 0;
10314 return 0;
10315 }
10316
10317 kvm_rip_write(vcpu, ctxt->eip);
10318 kvm_set_rflags(vcpu, ctxt->eflags);
10319 return 1;
10320}
10321EXPORT_SYMBOL_GPL(kvm_task_switch);
10322
10323static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10324{
10325 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
10326
10327
10328
10329
10330
10331 if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
10332 return false;
10333 if (kvm_vcpu_is_illegal_gpa(vcpu, sregs->cr3))
10334 return false;
10335 } else {
10336
10337
10338
10339
10340 if (sregs->efer & EFER_LMA || sregs->cs.l)
10341 return false;
10342 }
10343
10344 return kvm_is_valid_cr4(vcpu, sregs->cr4);
10345}
10346
10347static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
10348 int *mmu_reset_needed, bool update_pdptrs)
10349{
10350 struct msr_data apic_base_msr;
10351 int idx;
10352 struct desc_ptr dt;
10353
10354 if (!kvm_is_valid_sregs(vcpu, sregs))
10355 return -EINVAL;
10356
10357 apic_base_msr.data = sregs->apic_base;
10358 apic_base_msr.host_initiated = true;
10359 if (kvm_set_apic_base(vcpu, &apic_base_msr))
10360 return -EINVAL;
10361
10362 if (vcpu->arch.guest_state_protected)
10363 return 0;
10364
10365 dt.size = sregs->idt.limit;
10366 dt.address = sregs->idt.base;
10367 static_call(kvm_x86_set_idt)(vcpu, &dt);
10368 dt.size = sregs->gdt.limit;
10369 dt.address = sregs->gdt.base;
10370 static_call(kvm_x86_set_gdt)(vcpu, &dt);
10371
10372 vcpu->arch.cr2 = sregs->cr2;
10373 *mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
10374 vcpu->arch.cr3 = sregs->cr3;
10375 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
10376
10377 kvm_set_cr8(vcpu, sregs->cr8);
10378
10379 *mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
10380 static_call(kvm_x86_set_efer)(vcpu, sregs->efer);
10381
10382 *mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
10383 static_call(kvm_x86_set_cr0)(vcpu, sregs->cr0);
10384 vcpu->arch.cr0 = sregs->cr0;
10385
10386 *mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
10387 static_call(kvm_x86_set_cr4)(vcpu, sregs->cr4);
10388
10389 if (update_pdptrs) {
10390 idx = srcu_read_lock(&vcpu->kvm->srcu);
10391 if (is_pae_paging(vcpu)) {
10392 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
10393 *mmu_reset_needed = 1;
10394 }
10395 srcu_read_unlock(&vcpu->kvm->srcu, idx);
10396 }
10397
10398 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
10399 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
10400 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
10401 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
10402 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
10403 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
10404
10405 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
10406 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
10407
10408 update_cr8_intercept(vcpu);
10409
10410
10411 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
10412 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
10413 !is_protmode(vcpu))
10414 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10415
10416 return 0;
10417}
10418
10419static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10420{
10421 int pending_vec, max_bits;
10422 int mmu_reset_needed = 0;
10423 int ret = __set_sregs_common(vcpu, sregs, &mmu_reset_needed, true);
10424
10425 if (ret)
10426 return ret;
10427
10428 if (mmu_reset_needed)
10429 kvm_mmu_reset_context(vcpu);
10430
10431 max_bits = KVM_NR_INTERRUPTS;
10432 pending_vec = find_first_bit(
10433 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
10434
10435 if (pending_vec < max_bits) {
10436 kvm_queue_interrupt(vcpu, pending_vec, false);
10437 pr_debug("Set back pending irq %d\n", pending_vec);
10438 kvm_make_request(KVM_REQ_EVENT, vcpu);
10439 }
10440 return 0;
10441}
10442
10443static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
10444{
10445 int mmu_reset_needed = 0;
10446 bool valid_pdptrs = sregs2->flags & KVM_SREGS2_FLAGS_PDPTRS_VALID;
10447 bool pae = (sregs2->cr0 & X86_CR0_PG) && (sregs2->cr4 & X86_CR4_PAE) &&
10448 !(sregs2->efer & EFER_LMA);
10449 int i, ret;
10450
10451 if (sregs2->flags & ~KVM_SREGS2_FLAGS_PDPTRS_VALID)
10452 return -EINVAL;
10453
10454 if (valid_pdptrs && (!pae || vcpu->arch.guest_state_protected))
10455 return -EINVAL;
10456
10457 ret = __set_sregs_common(vcpu, (struct kvm_sregs *)sregs2,
10458 &mmu_reset_needed, !valid_pdptrs);
10459 if (ret)
10460 return ret;
10461
10462 if (valid_pdptrs) {
10463 for (i = 0; i < 4 ; i++)
10464 kvm_pdptr_write(vcpu, i, sregs2->pdptrs[i]);
10465
10466 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
10467 mmu_reset_needed = 1;
10468 vcpu->arch.pdptrs_from_userspace = true;
10469 }
10470 if (mmu_reset_needed)
10471 kvm_mmu_reset_context(vcpu);
10472 return 0;
10473}
10474
10475int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
10476 struct kvm_sregs *sregs)
10477{
10478 int ret;
10479
10480 vcpu_load(vcpu);
10481 ret = __set_sregs(vcpu, sregs);
10482 vcpu_put(vcpu);
10483 return ret;
10484}
10485
10486int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
10487 struct kvm_guest_debug *dbg)
10488{
10489 unsigned long rflags;
10490 int i, r;
10491
10492 if (vcpu->arch.guest_state_protected)
10493 return -EINVAL;
10494
10495 vcpu_load(vcpu);
10496
10497 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
10498 r = -EBUSY;
10499 if (vcpu->arch.exception.pending)
10500 goto out;
10501 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
10502 kvm_queue_exception(vcpu, DB_VECTOR);
10503 else
10504 kvm_queue_exception(vcpu, BP_VECTOR);
10505 }
10506
10507
10508
10509
10510
10511 rflags = kvm_get_rflags(vcpu);
10512
10513 vcpu->guest_debug = dbg->control;
10514 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
10515 vcpu->guest_debug = 0;
10516
10517 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
10518 for (i = 0; i < KVM_NR_DB_REGS; ++i)
10519 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
10520 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
10521 } else {
10522 for (i = 0; i < KVM_NR_DB_REGS; i++)
10523 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
10524 }
10525 kvm_update_dr7(vcpu);
10526
10527 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
10528 vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu);
10529
10530
10531
10532
10533
10534 kvm_set_rflags(vcpu, rflags);
10535
10536 static_call(kvm_x86_update_exception_bitmap)(vcpu);
10537
10538 r = 0;
10539
10540out:
10541 vcpu_put(vcpu);
10542 return r;
10543}
10544
10545
10546
10547
10548int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
10549 struct kvm_translation *tr)
10550{
10551 unsigned long vaddr = tr->linear_address;
10552 gpa_t gpa;
10553 int idx;
10554
10555 vcpu_load(vcpu);
10556
10557 idx = srcu_read_lock(&vcpu->kvm->srcu);
10558 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
10559 srcu_read_unlock(&vcpu->kvm->srcu, idx);
10560 tr->physical_address = gpa;
10561 tr->valid = gpa != UNMAPPED_GVA;
10562 tr->writeable = 1;
10563 tr->usermode = 0;
10564
10565 vcpu_put(vcpu);
10566 return 0;
10567}
10568
10569int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
10570{
10571 struct fxregs_state *fxsave;
10572
10573 if (!vcpu->arch.guest_fpu)
10574 return 0;
10575
10576 vcpu_load(vcpu);
10577
10578 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
10579 memcpy(fpu->fpr, fxsave->st_space, 128);
10580 fpu->fcw = fxsave->cwd;
10581 fpu->fsw = fxsave->swd;
10582 fpu->ftwx = fxsave->twd;
10583 fpu->last_opcode = fxsave->fop;
10584 fpu->last_ip = fxsave->rip;
10585 fpu->last_dp = fxsave->rdp;
10586 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
10587
10588 vcpu_put(vcpu);
10589 return 0;
10590}
10591
10592int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
10593{
10594 struct fxregs_state *fxsave;
10595
10596 if (!vcpu->arch.guest_fpu)
10597 return 0;
10598
10599 vcpu_load(vcpu);
10600
10601 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
10602
10603 memcpy(fxsave->st_space, fpu->fpr, 128);
10604 fxsave->cwd = fpu->fcw;
10605 fxsave->swd = fpu->fsw;
10606 fxsave->twd = fpu->ftwx;
10607 fxsave->fop = fpu->last_opcode;
10608 fxsave->rip = fpu->last_ip;
10609 fxsave->rdp = fpu->last_dp;
10610 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
10611
10612 vcpu_put(vcpu);
10613 return 0;
10614}
10615
10616static void store_regs(struct kvm_vcpu *vcpu)
10617{
10618 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
10619
10620 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
10621 __get_regs(vcpu, &vcpu->run->s.regs.regs);
10622
10623 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
10624 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
10625
10626 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
10627 kvm_vcpu_ioctl_x86_get_vcpu_events(
10628 vcpu, &vcpu->run->s.regs.events);
10629}
10630
10631static int sync_regs(struct kvm_vcpu *vcpu)
10632{
10633 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
10634 __set_regs(vcpu, &vcpu->run->s.regs.regs);
10635 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
10636 }
10637 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
10638 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
10639 return -EINVAL;
10640 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
10641 }
10642 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
10643 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
10644 vcpu, &vcpu->run->s.regs.events))
10645 return -EINVAL;
10646 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
10647 }
10648
10649 return 0;
10650}
10651
10652static void fx_init(struct kvm_vcpu *vcpu)
10653{
10654 if (!vcpu->arch.guest_fpu)
10655 return;
10656
10657 fpstate_init(&vcpu->arch.guest_fpu->state);
10658 if (boot_cpu_has(X86_FEATURE_XSAVES))
10659 vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
10660 host_xcr0 | XSTATE_COMPACTION_ENABLED;
10661
10662
10663
10664
10665 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
10666
10667 vcpu->arch.cr0 |= X86_CR0_ET;
10668}
10669
10670void kvm_free_guest_fpu(struct kvm_vcpu *vcpu)
10671{
10672 if (vcpu->arch.guest_fpu) {
10673 kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
10674 vcpu->arch.guest_fpu = NULL;
10675 }
10676}
10677EXPORT_SYMBOL_GPL(kvm_free_guest_fpu);
10678
10679int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
10680{
10681 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
10682 pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
10683 "guest TSC will not be reliable\n");
10684
10685 return 0;
10686}
10687
10688int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
10689{
10690 struct page *page;
10691 int r;
10692
10693 vcpu->arch.last_vmentry_cpu = -1;
10694 vcpu->arch.regs_avail = ~0;
10695 vcpu->arch.regs_dirty = ~0;
10696
10697 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
10698 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10699 else
10700 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
10701
10702 r = kvm_mmu_create(vcpu);
10703 if (r < 0)
10704 return r;
10705
10706 if (irqchip_in_kernel(vcpu->kvm)) {
10707 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
10708 if (r < 0)
10709 goto fail_mmu_destroy;
10710 if (kvm_apicv_activated(vcpu->kvm))
10711 vcpu->arch.apicv_active = true;
10712 } else
10713 static_branch_inc(&kvm_has_noapic_vcpu);
10714
10715 r = -ENOMEM;
10716
10717 page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
10718 if (!page)
10719 goto fail_free_lapic;
10720 vcpu->arch.pio_data = page_address(page);
10721
10722 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
10723 GFP_KERNEL_ACCOUNT);
10724 if (!vcpu->arch.mce_banks)
10725 goto fail_free_pio_data;
10726 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
10727
10728 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
10729 GFP_KERNEL_ACCOUNT))
10730 goto fail_free_mce_banks;
10731
10732 if (!alloc_emulate_ctxt(vcpu))
10733 goto free_wbinvd_dirty_mask;
10734
10735 vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
10736 GFP_KERNEL_ACCOUNT);
10737 if (!vcpu->arch.user_fpu) {
10738 pr_err("kvm: failed to allocate userspace's fpu\n");
10739 goto free_emulate_ctxt;
10740 }
10741
10742 vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
10743 GFP_KERNEL_ACCOUNT);
10744 if (!vcpu->arch.guest_fpu) {
10745 pr_err("kvm: failed to allocate vcpu's fpu\n");
10746 goto free_user_fpu;
10747 }
10748 fx_init(vcpu);
10749
10750 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
10751 vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
10752
10753 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
10754
10755 kvm_async_pf_hash_reset(vcpu);
10756 kvm_pmu_init(vcpu);
10757
10758 vcpu->arch.pending_external_vector = -1;
10759 vcpu->arch.preempted_in_kernel = false;
10760
10761#if IS_ENABLED(CONFIG_HYPERV)
10762 vcpu->arch.hv_root_tdp = INVALID_PAGE;
10763#endif
10764
10765 r = static_call(kvm_x86_vcpu_create)(vcpu);
10766 if (r)
10767 goto free_guest_fpu;
10768
10769 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
10770 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
10771 kvm_vcpu_mtrr_init(vcpu);
10772 vcpu_load(vcpu);
10773 kvm_set_tsc_khz(vcpu, max_tsc_khz);
10774 kvm_vcpu_reset(vcpu, false);
10775 kvm_init_mmu(vcpu);
10776 vcpu_put(vcpu);
10777 return 0;
10778
10779free_guest_fpu:
10780 kvm_free_guest_fpu(vcpu);
10781free_user_fpu:
10782 kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
10783free_emulate_ctxt:
10784 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
10785free_wbinvd_dirty_mask:
10786 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
10787fail_free_mce_banks:
10788 kfree(vcpu->arch.mce_banks);
10789fail_free_pio_data:
10790 free_page((unsigned long)vcpu->arch.pio_data);
10791fail_free_lapic:
10792 kvm_free_lapic(vcpu);
10793fail_mmu_destroy:
10794 kvm_mmu_destroy(vcpu);
10795 return r;
10796}
10797
10798void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
10799{
10800 struct kvm *kvm = vcpu->kvm;
10801
10802 if (mutex_lock_killable(&vcpu->mutex))
10803 return;
10804 vcpu_load(vcpu);
10805 kvm_synchronize_tsc(vcpu, 0);
10806 vcpu_put(vcpu);
10807
10808
10809 vcpu->arch.msr_kvm_poll_control = 1;
10810
10811 mutex_unlock(&vcpu->mutex);
10812
10813 if (kvmclock_periodic_sync && vcpu->vcpu_idx == 0)
10814 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
10815 KVMCLOCK_SYNC_PERIOD);
10816}
10817
10818void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
10819{
10820 struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
10821 int idx;
10822
10823 kvm_release_pfn(cache->pfn, cache->dirty, cache);
10824
10825 kvmclock_reset(vcpu);
10826
10827 static_call(kvm_x86_vcpu_free)(vcpu);
10828
10829 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
10830 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
10831 kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
10832 kvm_free_guest_fpu(vcpu);
10833
10834 kvm_hv_vcpu_uninit(vcpu);
10835 kvm_pmu_destroy(vcpu);
10836 kfree(vcpu->arch.mce_banks);
10837 kvm_free_lapic(vcpu);
10838 idx = srcu_read_lock(&vcpu->kvm->srcu);
10839 kvm_mmu_destroy(vcpu);
10840 srcu_read_unlock(&vcpu->kvm->srcu, idx);
10841 free_page((unsigned long)vcpu->arch.pio_data);
10842 kvfree(vcpu->arch.cpuid_entries);
10843 if (!lapic_in_kernel(vcpu))
10844 static_branch_dec(&kvm_has_noapic_vcpu);
10845}
10846
10847void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
10848{
10849 unsigned long old_cr0 = kvm_read_cr0(vcpu);
10850 unsigned long new_cr0;
10851 u32 eax, dummy;
10852
10853 kvm_lapic_reset(vcpu, init_event);
10854
10855 vcpu->arch.hflags = 0;
10856
10857 vcpu->arch.smi_pending = 0;
10858 vcpu->arch.smi_count = 0;
10859 atomic_set(&vcpu->arch.nmi_queued, 0);
10860 vcpu->arch.nmi_pending = 0;
10861 vcpu->arch.nmi_injected = false;
10862 kvm_clear_interrupt_queue(vcpu);
10863 kvm_clear_exception_queue(vcpu);
10864
10865 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
10866 kvm_update_dr0123(vcpu);
10867 vcpu->arch.dr6 = DR6_ACTIVE_LOW;
10868 vcpu->arch.dr7 = DR7_FIXED_1;
10869 kvm_update_dr7(vcpu);
10870
10871 vcpu->arch.cr2 = 0;
10872
10873 kvm_make_request(KVM_REQ_EVENT, vcpu);
10874 vcpu->arch.apf.msr_en_val = 0;
10875 vcpu->arch.apf.msr_int_val = 0;
10876 vcpu->arch.st.msr_val = 0;
10877
10878 kvmclock_reset(vcpu);
10879
10880 kvm_clear_async_pf_completion_queue(vcpu);
10881 kvm_async_pf_hash_reset(vcpu);
10882 vcpu->arch.apf.halted = false;
10883
10884 if (vcpu->arch.guest_fpu && kvm_mpx_supported()) {
10885 void *mpx_state_buffer;
10886
10887
10888
10889
10890
10891 if (init_event)
10892 kvm_put_guest_fpu(vcpu);
10893 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
10894 XFEATURE_BNDREGS);
10895 if (mpx_state_buffer)
10896 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
10897 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
10898 XFEATURE_BNDCSR);
10899 if (mpx_state_buffer)
10900 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
10901 if (init_event)
10902 kvm_load_guest_fpu(vcpu);
10903 }
10904
10905 if (!init_event) {
10906 kvm_pmu_reset(vcpu);
10907 vcpu->arch.smbase = 0x30000;
10908
10909 vcpu->arch.msr_misc_features_enables = 0;
10910
10911 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
10912 }
10913
10914 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
10915 vcpu->arch.regs_avail = ~0;
10916 vcpu->arch.regs_dirty = ~0;
10917
10918
10919
10920
10921
10922
10923
10924
10925 eax = 1;
10926 if (!kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true))
10927 eax = 0x600;
10928 kvm_rdx_write(vcpu, eax);
10929
10930 vcpu->arch.ia32_xss = 0;
10931
10932 static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
10933
10934 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
10935 kvm_rip_write(vcpu, 0xfff0);
10936
10937 vcpu->arch.cr3 = 0;
10938 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
10939
10940
10941
10942
10943
10944
10945 new_cr0 = X86_CR0_ET;
10946 if (init_event)
10947 new_cr0 |= (old_cr0 & (X86_CR0_NW | X86_CR0_CD));
10948 else
10949 new_cr0 |= X86_CR0_NW | X86_CR0_CD;
10950
10951 static_call(kvm_x86_set_cr0)(vcpu, new_cr0);
10952 static_call(kvm_x86_set_cr4)(vcpu, 0);
10953 static_call(kvm_x86_set_efer)(vcpu, 0);
10954 static_call(kvm_x86_update_exception_bitmap)(vcpu);
10955
10956
10957
10958
10959
10960
10961
10962
10963
10964 if (old_cr0 & X86_CR0_PG)
10965 kvm_mmu_reset_context(vcpu);
10966
10967
10968
10969
10970
10971
10972
10973
10974
10975
10976 if (init_event)
10977 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
10978}
10979EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
10980
10981void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
10982{
10983 struct kvm_segment cs;
10984
10985 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
10986 cs.selector = vector << 8;
10987 cs.base = vector << 12;
10988 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
10989 kvm_rip_write(vcpu, 0);
10990}
10991EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
10992
10993int kvm_arch_hardware_enable(void)
10994{
10995 struct kvm *kvm;
10996 struct kvm_vcpu *vcpu;
10997 int i;
10998 int ret;
10999 u64 local_tsc;
11000 u64 max_tsc = 0;
11001 bool stable, backwards_tsc = false;
11002
11003 kvm_user_return_msr_cpu_online();
11004 ret = static_call(kvm_x86_hardware_enable)();
11005 if (ret != 0)
11006 return ret;
11007
11008 local_tsc = rdtsc();
11009 stable = !kvm_check_tsc_unstable();
11010 list_for_each_entry(kvm, &vm_list, vm_list) {
11011 kvm_for_each_vcpu(i, vcpu, kvm) {
11012 if (!stable && vcpu->cpu == smp_processor_id())
11013 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
11014 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
11015 backwards_tsc = true;
11016 if (vcpu->arch.last_host_tsc > max_tsc)
11017 max_tsc = vcpu->arch.last_host_tsc;
11018 }
11019 }
11020 }
11021
11022
11023
11024
11025
11026
11027
11028
11029
11030
11031
11032
11033
11034
11035
11036
11037
11038
11039
11040
11041
11042
11043
11044
11045
11046
11047
11048
11049
11050
11051
11052
11053
11054
11055
11056
11057
11058
11059
11060 if (backwards_tsc) {
11061 u64 delta_cyc = max_tsc - local_tsc;
11062 list_for_each_entry(kvm, &vm_list, vm_list) {
11063 kvm->arch.backwards_tsc_observed = true;
11064 kvm_for_each_vcpu(i, vcpu, kvm) {
11065 vcpu->arch.tsc_offset_adjustment += delta_cyc;
11066 vcpu->arch.last_host_tsc = local_tsc;
11067 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
11068 }
11069
11070
11071
11072
11073
11074
11075
11076 kvm->arch.last_tsc_nsec = 0;
11077 kvm->arch.last_tsc_write = 0;
11078 }
11079
11080 }
11081 return 0;
11082}
11083
11084void kvm_arch_hardware_disable(void)
11085{
11086 static_call(kvm_x86_hardware_disable)();
11087 drop_user_return_notifiers();
11088}
11089
11090int kvm_arch_hardware_setup(void *opaque)
11091{
11092 struct kvm_x86_init_ops *ops = opaque;
11093 int r;
11094
11095 rdmsrl_safe(MSR_EFER, &host_efer);
11096
11097 if (boot_cpu_has(X86_FEATURE_XSAVES))
11098 rdmsrl(MSR_IA32_XSS, host_xss);
11099
11100 r = ops->hardware_setup();
11101 if (r != 0)
11102 return r;
11103
11104 memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
11105 kvm_ops_static_call_update();
11106
11107 if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
11108 supported_xss = 0;
11109
11110#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
11111 cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
11112#undef __kvm_cpu_cap_has
11113
11114 if (kvm_has_tsc_control) {
11115
11116
11117
11118
11119
11120
11121 u64 max = min(0x7fffffffULL,
11122 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
11123 kvm_max_guest_tsc_khz = max;
11124
11125 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
11126 }
11127
11128 kvm_init_msr_list();
11129 return 0;
11130}
11131
11132void kvm_arch_hardware_unsetup(void)
11133{
11134 static_call(kvm_x86_hardware_unsetup)();
11135}
11136
11137int kvm_arch_check_processor_compat(void *opaque)
11138{
11139 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
11140 struct kvm_x86_init_ops *ops = opaque;
11141
11142 WARN_ON(!irqs_disabled());
11143
11144 if (__cr4_reserved_bits(cpu_has, c) !=
11145 __cr4_reserved_bits(cpu_has, &boot_cpu_data))
11146 return -EIO;
11147
11148 return ops->check_processor_compatibility();
11149}
11150
11151bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
11152{
11153 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
11154}
11155EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
11156
11157bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
11158{
11159 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
11160}
11161
11162__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
11163EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
11164
11165void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
11166{
11167 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
11168
11169 vcpu->arch.l1tf_flush_l1d = true;
11170 if (pmu->version && unlikely(pmu->event_count)) {
11171 pmu->need_cleanup = true;
11172 kvm_make_request(KVM_REQ_PMU, vcpu);
11173 }
11174 static_call(kvm_x86_sched_in)(vcpu, cpu);
11175}
11176
11177void kvm_arch_free_vm(struct kvm *kvm)
11178{
11179 kfree(to_kvm_hv(kvm)->hv_pa_pg);
11180 vfree(kvm);
11181}
11182
11183
11184int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
11185{
11186 int ret;
11187
11188 if (type)
11189 return -EINVAL;
11190
11191 ret = kvm_page_track_init(kvm);
11192 if (ret)
11193 return ret;
11194
11195 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
11196 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
11197 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
11198 INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
11199 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
11200 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
11201
11202
11203 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
11204
11205 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
11206 &kvm->arch.irq_sources_bitmap);
11207
11208 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
11209 mutex_init(&kvm->arch.apic_map_lock);
11210 raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
11211
11212 kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
11213 pvclock_update_vm_gtod_copy(kvm);
11214
11215 kvm->arch.guest_can_read_msr_platform_info = true;
11216
11217#if IS_ENABLED(CONFIG_HYPERV)
11218 spin_lock_init(&kvm->arch.hv_root_tdp_lock);
11219 kvm->arch.hv_root_tdp = INVALID_PAGE;
11220#endif
11221
11222 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
11223 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
11224
11225 kvm_apicv_init(kvm);
11226 kvm_hv_init_vm(kvm);
11227 kvm_mmu_init_vm(kvm);
11228 kvm_xen_init_vm(kvm);
11229
11230 return static_call(kvm_x86_vm_init)(kvm);
11231}
11232
11233int kvm_arch_post_init_vm(struct kvm *kvm)
11234{
11235 return kvm_mmu_post_init_vm(kvm);
11236}
11237
11238static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
11239{
11240 vcpu_load(vcpu);
11241 kvm_mmu_unload(vcpu);
11242 vcpu_put(vcpu);
11243}
11244
11245static void kvm_free_vcpus(struct kvm *kvm)
11246{
11247 unsigned int i;
11248 struct kvm_vcpu *vcpu;
11249
11250
11251
11252
11253 kvm_for_each_vcpu(i, vcpu, kvm) {
11254 kvm_clear_async_pf_completion_queue(vcpu);
11255 kvm_unload_vcpu_mmu(vcpu);
11256 }
11257 kvm_for_each_vcpu(i, vcpu, kvm)
11258 kvm_vcpu_destroy(vcpu);
11259
11260 mutex_lock(&kvm->lock);
11261 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
11262 kvm->vcpus[i] = NULL;
11263
11264 atomic_set(&kvm->online_vcpus, 0);
11265 mutex_unlock(&kvm->lock);
11266}
11267
11268void kvm_arch_sync_events(struct kvm *kvm)
11269{
11270 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
11271 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
11272 kvm_free_pit(kvm);
11273}
11274
11275#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
11276
11277
11278
11279
11280
11281
11282
11283
11284
11285
11286
11287
11288
11289
11290
11291
11292
11293
11294
11295
11296
11297
11298
11299void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
11300 u32 size)
11301{
11302 int i, r;
11303 unsigned long hva, old_npages;
11304 struct kvm_memslots *slots = kvm_memslots(kvm);
11305 struct kvm_memory_slot *slot;
11306
11307
11308 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
11309 return ERR_PTR_USR(-EINVAL);
11310
11311 slot = id_to_memslot(slots, id);
11312 if (size) {
11313 if (slot && slot->npages)
11314 return ERR_PTR_USR(-EEXIST);
11315
11316
11317
11318
11319
11320 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
11321 MAP_SHARED | MAP_ANONYMOUS, 0);
11322 if (IS_ERR((void *)hva))
11323 return (void __user *)hva;
11324 } else {
11325 if (!slot || !slot->npages)
11326 return NULL;
11327
11328 old_npages = slot->npages;
11329 hva = slot->userspace_addr;
11330 }
11331
11332 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
11333 struct kvm_userspace_memory_region m;
11334
11335 m.slot = id | (i << 16);
11336 m.flags = 0;
11337 m.guest_phys_addr = gpa;
11338 m.userspace_addr = hva;
11339 m.memory_size = size;
11340 r = __kvm_set_memory_region(kvm, &m);
11341 if (r < 0)
11342 return ERR_PTR_USR(r);
11343 }
11344
11345 if (!size)
11346 vm_munmap(hva, old_npages * PAGE_SIZE);
11347
11348 return (void __user *)hva;
11349}
11350EXPORT_SYMBOL_GPL(__x86_set_memory_region);
11351
11352void kvm_arch_pre_destroy_vm(struct kvm *kvm)
11353{
11354 kvm_mmu_pre_destroy_vm(kvm);
11355}
11356
11357void kvm_arch_destroy_vm(struct kvm *kvm)
11358{
11359 if (current->mm == kvm->mm) {
11360
11361
11362
11363
11364
11365 mutex_lock(&kvm->slots_lock);
11366 __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
11367 0, 0);
11368 __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
11369 0, 0);
11370 __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
11371 mutex_unlock(&kvm->slots_lock);
11372 }
11373 static_call_cond(kvm_x86_vm_destroy)(kvm);
11374 kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
11375 kvm_pic_destroy(kvm);
11376 kvm_ioapic_destroy(kvm);
11377 kvm_free_vcpus(kvm);
11378 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
11379 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
11380 kvm_mmu_uninit_vm(kvm);
11381 kvm_page_track_cleanup(kvm);
11382 kvm_xen_destroy_vm(kvm);
11383 kvm_hv_destroy_vm(kvm);
11384}
11385
11386static void memslot_rmap_free(struct kvm_memory_slot *slot)
11387{
11388 int i;
11389
11390 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
11391 kvfree(slot->arch.rmap[i]);
11392 slot->arch.rmap[i] = NULL;
11393 }
11394}
11395
11396void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
11397{
11398 int i;
11399
11400 memslot_rmap_free(slot);
11401
11402 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11403 kvfree(slot->arch.lpage_info[i - 1]);
11404 slot->arch.lpage_info[i - 1] = NULL;
11405 }
11406
11407 kvm_page_track_free_memslot(slot);
11408}
11409
11410static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
11411 unsigned long npages)
11412{
11413 const int sz = sizeof(*slot->arch.rmap[0]);
11414 int i;
11415
11416 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
11417 int level = i + 1;
11418 int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
11419
11420 if (slot->arch.rmap[i])
11421 continue;
11422
11423 slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
11424 if (!slot->arch.rmap[i]) {
11425 memslot_rmap_free(slot);
11426 return -ENOMEM;
11427 }
11428 }
11429
11430 return 0;
11431}
11432
11433int alloc_all_memslots_rmaps(struct kvm *kvm)
11434{
11435 struct kvm_memslots *slots;
11436 struct kvm_memory_slot *slot;
11437 int r, i;
11438
11439
11440
11441
11442
11443 if (kvm_memslots_have_rmaps(kvm))
11444 return 0;
11445
11446 mutex_lock(&kvm->slots_arch_lock);
11447
11448
11449
11450
11451
11452 if (kvm_memslots_have_rmaps(kvm)) {
11453 mutex_unlock(&kvm->slots_arch_lock);
11454 return 0;
11455 }
11456
11457 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
11458 slots = __kvm_memslots(kvm, i);
11459 kvm_for_each_memslot(slot, slots) {
11460 r = memslot_rmap_alloc(slot, slot->npages);
11461 if (r) {
11462 mutex_unlock(&kvm->slots_arch_lock);
11463 return r;
11464 }
11465 }
11466 }
11467
11468
11469
11470
11471
11472 smp_store_release(&kvm->arch.memslots_have_rmaps, true);
11473 mutex_unlock(&kvm->slots_arch_lock);
11474 return 0;
11475}
11476
11477static int kvm_alloc_memslot_metadata(struct kvm *kvm,
11478 struct kvm_memory_slot *slot,
11479 unsigned long npages)
11480{
11481 int i, r;
11482
11483
11484
11485
11486
11487
11488 memset(&slot->arch, 0, sizeof(slot->arch));
11489
11490 if (kvm_memslots_have_rmaps(kvm)) {
11491 r = memslot_rmap_alloc(slot, npages);
11492 if (r)
11493 return r;
11494 }
11495
11496 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11497 struct kvm_lpage_info *linfo;
11498 unsigned long ugfn;
11499 int lpages;
11500 int level = i + 1;
11501
11502 lpages = __kvm_mmu_slot_lpages(slot, npages, level);
11503
11504 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
11505 if (!linfo)
11506 goto out_free;
11507
11508 slot->arch.lpage_info[i - 1] = linfo;
11509
11510 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
11511 linfo[0].disallow_lpage = 1;
11512 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
11513 linfo[lpages - 1].disallow_lpage = 1;
11514 ugfn = slot->userspace_addr >> PAGE_SHIFT;
11515
11516
11517
11518
11519 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1)) {
11520 unsigned long j;
11521
11522 for (j = 0; j < lpages; ++j)
11523 linfo[j].disallow_lpage = 1;
11524 }
11525 }
11526
11527 if (kvm_page_track_create_memslot(slot, npages))
11528 goto out_free;
11529
11530 return 0;
11531
11532out_free:
11533 memslot_rmap_free(slot);
11534
11535 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11536 kvfree(slot->arch.lpage_info[i - 1]);
11537 slot->arch.lpage_info[i - 1] = NULL;
11538 }
11539 return -ENOMEM;
11540}
11541
11542void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
11543{
11544 struct kvm_vcpu *vcpu;
11545 int i;
11546
11547
11548
11549
11550
11551 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
11552
11553
11554 kvm_for_each_vcpu(i, vcpu, kvm)
11555 kvm_vcpu_kick(vcpu);
11556}
11557
11558int kvm_arch_prepare_memory_region(struct kvm *kvm,
11559 struct kvm_memory_slot *memslot,
11560 const struct kvm_userspace_memory_region *mem,
11561 enum kvm_mr_change change)
11562{
11563 if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
11564 return kvm_alloc_memslot_metadata(kvm, memslot,
11565 mem->memory_size >> PAGE_SHIFT);
11566 return 0;
11567}
11568
11569
11570static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
11571{
11572 struct kvm_arch *ka = &kvm->arch;
11573
11574 if (!kvm_x86_ops.cpu_dirty_log_size)
11575 return;
11576
11577 if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
11578 (!enable && --ka->cpu_dirty_logging_count == 0))
11579 kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
11580
11581 WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
11582}
11583
11584static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
11585 struct kvm_memory_slot *old,
11586 const struct kvm_memory_slot *new,
11587 enum kvm_mr_change change)
11588{
11589 bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
11590
11591
11592
11593
11594
11595 if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)
11596 kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
11597
11598
11599
11600
11601
11602
11603
11604
11605
11606
11607
11608
11609
11610
11611
11612
11613 if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
11614 return;
11615
11616
11617
11618
11619
11620
11621 if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)))
11622 return;
11623
11624 if (!log_dirty_pages) {
11625
11626
11627
11628
11629
11630
11631
11632
11633
11634
11635
11636
11637
11638 kvm_mmu_zap_collapsible_sptes(kvm, new);
11639 } else {
11640
11641
11642
11643
11644 if (kvm_dirty_log_manual_protect_and_init_set(kvm))
11645 return;
11646
11647 if (kvm_x86_ops.cpu_dirty_log_size) {
11648 kvm_mmu_slot_leaf_clear_dirty(kvm, new);
11649 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
11650 } else {
11651 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
11652 }
11653 }
11654}
11655
11656void kvm_arch_commit_memory_region(struct kvm *kvm,
11657 const struct kvm_userspace_memory_region *mem,
11658 struct kvm_memory_slot *old,
11659 const struct kvm_memory_slot *new,
11660 enum kvm_mr_change change)
11661{
11662 if (!kvm->arch.n_requested_mmu_pages)
11663 kvm_mmu_change_mmu_pages(kvm,
11664 kvm_mmu_calculate_default_mmu_pages(kvm));
11665
11666 kvm_mmu_slot_apply_flags(kvm, old, new, change);
11667
11668
11669 if (change == KVM_MR_MOVE)
11670 kvm_arch_free_memslot(kvm, old);
11671}
11672
11673void kvm_arch_flush_shadow_all(struct kvm *kvm)
11674{
11675 kvm_mmu_zap_all(kvm);
11676}
11677
11678void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
11679 struct kvm_memory_slot *slot)
11680{
11681 kvm_page_track_flush_slot(kvm, slot);
11682}
11683
11684static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
11685{
11686 return (is_guest_mode(vcpu) &&
11687 kvm_x86_ops.guest_apic_has_interrupt &&
11688 static_call(kvm_x86_guest_apic_has_interrupt)(vcpu));
11689}
11690
11691static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
11692{
11693 if (!list_empty_careful(&vcpu->async_pf.done))
11694 return true;
11695
11696 if (kvm_apic_has_events(vcpu))
11697 return true;
11698
11699 if (vcpu->arch.pv.pv_unhalted)
11700 return true;
11701
11702 if (vcpu->arch.exception.pending)
11703 return true;
11704
11705 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
11706 (vcpu->arch.nmi_pending &&
11707 static_call(kvm_x86_nmi_allowed)(vcpu, false)))
11708 return true;
11709
11710 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
11711 (vcpu->arch.smi_pending &&
11712 static_call(kvm_x86_smi_allowed)(vcpu, false)))
11713 return true;
11714
11715 if (kvm_arch_interrupt_allowed(vcpu) &&
11716 (kvm_cpu_has_interrupt(vcpu) ||
11717 kvm_guest_apic_has_interrupt(vcpu)))
11718 return true;
11719
11720 if (kvm_hv_has_stimer_pending(vcpu))
11721 return true;
11722
11723 if (is_guest_mode(vcpu) &&
11724 kvm_x86_ops.nested_ops->hv_timer_pending &&
11725 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
11726 return true;
11727
11728 return false;
11729}
11730
11731int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
11732{
11733 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
11734}
11735
11736bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
11737{
11738 if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
11739 return true;
11740
11741 return false;
11742}
11743
11744bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
11745{
11746 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
11747 return true;
11748
11749 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
11750 kvm_test_request(KVM_REQ_SMI, vcpu) ||
11751 kvm_test_request(KVM_REQ_EVENT, vcpu))
11752 return true;
11753
11754 return kvm_arch_dy_has_pending_interrupt(vcpu);
11755}
11756
11757bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
11758{
11759 if (vcpu->arch.guest_state_protected)
11760 return true;
11761
11762 return vcpu->arch.preempted_in_kernel;
11763}
11764
11765int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
11766{
11767 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
11768}
11769
11770int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
11771{
11772 return static_call(kvm_x86_interrupt_allowed)(vcpu, false);
11773}
11774
11775unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
11776{
11777
11778 if (vcpu->arch.guest_state_protected)
11779 return 0;
11780
11781 if (is_64_bit_mode(vcpu))
11782 return kvm_rip_read(vcpu);
11783 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
11784 kvm_rip_read(vcpu));
11785}
11786EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
11787
11788bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
11789{
11790 return kvm_get_linear_rip(vcpu) == linear_rip;
11791}
11792EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
11793
11794unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
11795{
11796 unsigned long rflags;
11797
11798 rflags = static_call(kvm_x86_get_rflags)(vcpu);
11799 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
11800 rflags &= ~X86_EFLAGS_TF;
11801 return rflags;
11802}
11803EXPORT_SYMBOL_GPL(kvm_get_rflags);
11804
11805static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
11806{
11807 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
11808 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
11809 rflags |= X86_EFLAGS_TF;
11810 static_call(kvm_x86_set_rflags)(vcpu, rflags);
11811}
11812
11813void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
11814{
11815 __kvm_set_rflags(vcpu, rflags);
11816 kvm_make_request(KVM_REQ_EVENT, vcpu);
11817}
11818EXPORT_SYMBOL_GPL(kvm_set_rflags);
11819
11820void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
11821{
11822 int r;
11823
11824 if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
11825 work->wakeup_all)
11826 return;
11827
11828 r = kvm_mmu_reload(vcpu);
11829 if (unlikely(r))
11830 return;
11831
11832 if (!vcpu->arch.mmu->direct_map &&
11833 work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
11834 return;
11835
11836 kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
11837}
11838
11839static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
11840{
11841 BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
11842
11843 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
11844}
11845
11846static inline u32 kvm_async_pf_next_probe(u32 key)
11847{
11848 return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
11849}
11850
11851static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
11852{
11853 u32 key = kvm_async_pf_hash_fn(gfn);
11854
11855 while (vcpu->arch.apf.gfns[key] != ~0)
11856 key = kvm_async_pf_next_probe(key);
11857
11858 vcpu->arch.apf.gfns[key] = gfn;
11859}
11860
11861static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
11862{
11863 int i;
11864 u32 key = kvm_async_pf_hash_fn(gfn);
11865
11866 for (i = 0; i < ASYNC_PF_PER_VCPU &&
11867 (vcpu->arch.apf.gfns[key] != gfn &&
11868 vcpu->arch.apf.gfns[key] != ~0); i++)
11869 key = kvm_async_pf_next_probe(key);
11870
11871 return key;
11872}
11873
11874bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
11875{
11876 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
11877}
11878
11879static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
11880{
11881 u32 i, j, k;
11882
11883 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
11884
11885 if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
11886 return;
11887
11888 while (true) {
11889 vcpu->arch.apf.gfns[i] = ~0;
11890 do {
11891 j = kvm_async_pf_next_probe(j);
11892 if (vcpu->arch.apf.gfns[j] == ~0)
11893 return;
11894 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
11895
11896
11897
11898
11899
11900 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
11901 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
11902 i = j;
11903 }
11904}
11905
11906static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
11907{
11908 u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT;
11909
11910 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason,
11911 sizeof(reason));
11912}
11913
11914static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
11915{
11916 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
11917
11918 return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
11919 &token, offset, sizeof(token));
11920}
11921
11922static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
11923{
11924 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
11925 u32 val;
11926
11927 if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
11928 &val, offset, sizeof(val)))
11929 return false;
11930
11931 return !val;
11932}
11933
11934static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
11935{
11936 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
11937 return false;
11938
11939 if (!kvm_pv_async_pf_enabled(vcpu) ||
11940 (vcpu->arch.apf.send_user_only && static_call(kvm_x86_get_cpl)(vcpu) == 0))
11941 return false;
11942
11943 return true;
11944}
11945
11946bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
11947{
11948 if (unlikely(!lapic_in_kernel(vcpu) ||
11949 kvm_event_needs_reinjection(vcpu) ||
11950 vcpu->arch.exception.pending))
11951 return false;
11952
11953 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
11954 return false;
11955
11956
11957
11958
11959
11960 return kvm_arch_interrupt_allowed(vcpu);
11961}
11962
11963bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
11964 struct kvm_async_pf *work)
11965{
11966 struct x86_exception fault;
11967
11968 trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
11969 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
11970
11971 if (kvm_can_deliver_async_pf(vcpu) &&
11972 !apf_put_user_notpresent(vcpu)) {
11973 fault.vector = PF_VECTOR;
11974 fault.error_code_valid = true;
11975 fault.error_code = 0;
11976 fault.nested_page_fault = false;
11977 fault.address = work->arch.token;
11978 fault.async_page_fault = true;
11979 kvm_inject_page_fault(vcpu, &fault);
11980 return true;
11981 } else {
11982
11983
11984
11985
11986
11987
11988
11989
11990 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
11991 return false;
11992 }
11993}
11994
11995void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
11996 struct kvm_async_pf *work)
11997{
11998 struct kvm_lapic_irq irq = {
11999 .delivery_mode = APIC_DM_FIXED,
12000 .vector = vcpu->arch.apf.vec
12001 };
12002
12003 if (work->wakeup_all)
12004 work->arch.token = ~0;
12005 else
12006 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
12007 trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
12008
12009 if ((work->wakeup_all || work->notpresent_injected) &&
12010 kvm_pv_async_pf_enabled(vcpu) &&
12011 !apf_put_user_ready(vcpu, work->arch.token)) {
12012 vcpu->arch.apf.pageready_pending = true;
12013 kvm_apic_set_irq(vcpu, &irq, NULL);
12014 }
12015
12016 vcpu->arch.apf.halted = false;
12017 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
12018}
12019
12020void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
12021{
12022 kvm_make_request(KVM_REQ_APF_READY, vcpu);
12023 if (!vcpu->arch.apf.pageready_pending)
12024 kvm_vcpu_kick(vcpu);
12025}
12026
12027bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
12028{
12029 if (!kvm_pv_async_pf_enabled(vcpu))
12030 return true;
12031 else
12032 return kvm_lapic_enabled(vcpu) && apf_pageready_slot_free(vcpu);
12033}
12034
12035void kvm_arch_start_assignment(struct kvm *kvm)
12036{
12037 if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
12038 static_call_cond(kvm_x86_start_assignment)(kvm);
12039}
12040EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
12041
12042void kvm_arch_end_assignment(struct kvm *kvm)
12043{
12044 atomic_dec(&kvm->arch.assigned_device_count);
12045}
12046EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
12047
12048bool kvm_arch_has_assigned_device(struct kvm *kvm)
12049{
12050 return atomic_read(&kvm->arch.assigned_device_count);
12051}
12052EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
12053
12054void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
12055{
12056 atomic_inc(&kvm->arch.noncoherent_dma_count);
12057}
12058EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
12059
12060void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
12061{
12062 atomic_dec(&kvm->arch.noncoherent_dma_count);
12063}
12064EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
12065
12066bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
12067{
12068 return atomic_read(&kvm->arch.noncoherent_dma_count);
12069}
12070EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
12071
12072bool kvm_arch_has_irq_bypass(void)
12073{
12074 return true;
12075}
12076
12077int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
12078 struct irq_bypass_producer *prod)
12079{
12080 struct kvm_kernel_irqfd *irqfd =
12081 container_of(cons, struct kvm_kernel_irqfd, consumer);
12082 int ret;
12083
12084 irqfd->producer = prod;
12085 kvm_arch_start_assignment(irqfd->kvm);
12086 ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm,
12087 prod->irq, irqfd->gsi, 1);
12088
12089 if (ret)
12090 kvm_arch_end_assignment(irqfd->kvm);
12091
12092 return ret;
12093}
12094
12095void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
12096 struct irq_bypass_producer *prod)
12097{
12098 int ret;
12099 struct kvm_kernel_irqfd *irqfd =
12100 container_of(cons, struct kvm_kernel_irqfd, consumer);
12101
12102 WARN_ON(irqfd->producer != prod);
12103 irqfd->producer = NULL;
12104
12105
12106
12107
12108
12109
12110
12111 ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0);
12112 if (ret)
12113 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
12114 " fails: %d\n", irqfd->consumer.token, ret);
12115
12116 kvm_arch_end_assignment(irqfd->kvm);
12117}
12118
12119int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
12120 uint32_t guest_irq, bool set)
12121{
12122 return static_call(kvm_x86_update_pi_irte)(kvm, host_irq, guest_irq, set);
12123}
12124
12125bool kvm_vector_hashing_enabled(void)
12126{
12127 return vector_hashing;
12128}
12129
12130bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
12131{
12132 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
12133}
12134EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
12135
12136
12137int kvm_spec_ctrl_test_value(u64 value)
12138{
12139
12140
12141
12142
12143
12144 u64 saved_value;
12145 unsigned long flags;
12146 int ret = 0;
12147
12148 local_irq_save(flags);
12149
12150 if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
12151 ret = 1;
12152 else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
12153 ret = 1;
12154 else
12155 wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
12156
12157 local_irq_restore(flags);
12158
12159 return ret;
12160}
12161EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
12162
12163void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
12164{
12165 struct x86_exception fault;
12166 u32 access = error_code &
12167 (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
12168
12169 if (!(error_code & PFERR_PRESENT_MASK) ||
12170 vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) {
12171
12172
12173
12174
12175
12176 fault.vector = PF_VECTOR;
12177 fault.error_code_valid = true;
12178 fault.error_code = error_code;
12179 fault.nested_page_fault = false;
12180 fault.address = gva;
12181 }
12182 vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
12183}
12184EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
12185
12186
12187
12188
12189
12190
12191int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
12192 struct x86_exception *e)
12193{
12194 if (r == X86EMUL_PROPAGATE_FAULT) {
12195 kvm_inject_emulated_page_fault(vcpu, e);
12196 return 1;
12197 }
12198
12199
12200
12201
12202
12203
12204
12205
12206 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
12207 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
12208 vcpu->run->internal.ndata = 0;
12209
12210 return 0;
12211}
12212EXPORT_SYMBOL_GPL(kvm_handle_memory_failure);
12213
12214int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
12215{
12216 bool pcid_enabled;
12217 struct x86_exception e;
12218 struct {
12219 u64 pcid;
12220 u64 gla;
12221 } operand;
12222 int r;
12223
12224 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
12225 if (r != X86EMUL_CONTINUE)
12226 return kvm_handle_memory_failure(vcpu, r, &e);
12227
12228 if (operand.pcid >> 12 != 0) {
12229 kvm_inject_gp(vcpu, 0);
12230 return 1;
12231 }
12232
12233 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
12234
12235 switch (type) {
12236 case INVPCID_TYPE_INDIV_ADDR:
12237 if ((!pcid_enabled && (operand.pcid != 0)) ||
12238 is_noncanonical_address(operand.gla, vcpu)) {
12239 kvm_inject_gp(vcpu, 0);
12240 return 1;
12241 }
12242 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
12243 return kvm_skip_emulated_instruction(vcpu);
12244
12245 case INVPCID_TYPE_SINGLE_CTXT:
12246 if (!pcid_enabled && (operand.pcid != 0)) {
12247 kvm_inject_gp(vcpu, 0);
12248 return 1;
12249 }
12250
12251 kvm_invalidate_pcid(vcpu, operand.pcid);
12252 return kvm_skip_emulated_instruction(vcpu);
12253
12254 case INVPCID_TYPE_ALL_NON_GLOBAL:
12255
12256
12257
12258
12259
12260
12261
12262 fallthrough;
12263 case INVPCID_TYPE_ALL_INCL_GLOBAL:
12264 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
12265 return kvm_skip_emulated_instruction(vcpu);
12266
12267 default:
12268 BUG();
12269 }
12270}
12271EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
12272
12273static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
12274{
12275 struct kvm_run *run = vcpu->run;
12276 struct kvm_mmio_fragment *frag;
12277 unsigned int len;
12278
12279 BUG_ON(!vcpu->mmio_needed);
12280
12281
12282 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
12283 len = min(8u, frag->len);
12284 if (!vcpu->mmio_is_write)
12285 memcpy(frag->data, run->mmio.data, len);
12286
12287 if (frag->len <= 8) {
12288
12289 frag++;
12290 vcpu->mmio_cur_fragment++;
12291 } else {
12292
12293 frag->data += len;
12294 frag->gpa += len;
12295 frag->len -= len;
12296 }
12297
12298 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
12299 vcpu->mmio_needed = 0;
12300
12301
12302
12303 return 1;
12304 }
12305
12306
12307 run->mmio.phys_addr = frag->gpa;
12308 run->mmio.len = min(8u, frag->len);
12309 run->mmio.is_write = vcpu->mmio_is_write;
12310 if (run->mmio.is_write)
12311 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
12312 run->exit_reason = KVM_EXIT_MMIO;
12313
12314 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12315
12316 return 0;
12317}
12318
12319int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
12320 void *data)
12321{
12322 int handled;
12323 struct kvm_mmio_fragment *frag;
12324
12325 if (!data)
12326 return -EINVAL;
12327
12328 handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data);
12329 if (handled == bytes)
12330 return 1;
12331
12332 bytes -= handled;
12333 gpa += handled;
12334 data += handled;
12335
12336
12337 frag = vcpu->mmio_fragments;
12338 vcpu->mmio_nr_fragments = 1;
12339 frag->len = bytes;
12340 frag->gpa = gpa;
12341 frag->data = data;
12342
12343 vcpu->mmio_needed = 1;
12344 vcpu->mmio_cur_fragment = 0;
12345
12346 vcpu->run->mmio.phys_addr = gpa;
12347 vcpu->run->mmio.len = min(8u, frag->len);
12348 vcpu->run->mmio.is_write = 1;
12349 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
12350 vcpu->run->exit_reason = KVM_EXIT_MMIO;
12351
12352 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12353
12354 return 0;
12355}
12356EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);
12357
12358int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
12359 void *data)
12360{
12361 int handled;
12362 struct kvm_mmio_fragment *frag;
12363
12364 if (!data)
12365 return -EINVAL;
12366
12367 handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data);
12368 if (handled == bytes)
12369 return 1;
12370
12371 bytes -= handled;
12372 gpa += handled;
12373 data += handled;
12374
12375
12376 frag = vcpu->mmio_fragments;
12377 vcpu->mmio_nr_fragments = 1;
12378 frag->len = bytes;
12379 frag->gpa = gpa;
12380 frag->data = data;
12381
12382 vcpu->mmio_needed = 1;
12383 vcpu->mmio_cur_fragment = 0;
12384
12385 vcpu->run->mmio.phys_addr = gpa;
12386 vcpu->run->mmio.len = min(8u, frag->len);
12387 vcpu->run->mmio.is_write = 0;
12388 vcpu->run->exit_reason = KVM_EXIT_MMIO;
12389
12390 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12391
12392 return 0;
12393}
12394EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
12395
12396static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
12397 unsigned int port);
12398
12399static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
12400{
12401 int size = vcpu->arch.pio.size;
12402 int port = vcpu->arch.pio.port;
12403
12404 vcpu->arch.pio.count = 0;
12405 if (vcpu->arch.sev_pio_count)
12406 return kvm_sev_es_outs(vcpu, size, port);
12407 return 1;
12408}
12409
12410static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
12411 unsigned int port)
12412{
12413 for (;;) {
12414 unsigned int count =
12415 min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
12416 int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
12417
12418
12419 vcpu->arch.sev_pio_count -= count;
12420 vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
12421 if (!ret)
12422 break;
12423
12424
12425 if (!vcpu->arch.sev_pio_count)
12426 return 1;
12427 }
12428
12429 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
12430 return 0;
12431}
12432
12433static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
12434 unsigned int port);
12435
12436static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
12437{
12438 unsigned count = vcpu->arch.pio.count;
12439 complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
12440 vcpu->arch.sev_pio_count -= count;
12441 vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
12442}
12443
12444static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
12445{
12446 int size = vcpu->arch.pio.size;
12447 int port = vcpu->arch.pio.port;
12448
12449 advance_sev_es_emulated_ins(vcpu);
12450 if (vcpu->arch.sev_pio_count)
12451 return kvm_sev_es_ins(vcpu, size, port);
12452 return 1;
12453}
12454
12455static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
12456 unsigned int port)
12457{
12458 for (;;) {
12459 unsigned int count =
12460 min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
12461 if (!__emulator_pio_in(vcpu, size, port, count))
12462 break;
12463
12464
12465 advance_sev_es_emulated_ins(vcpu);
12466 if (!vcpu->arch.sev_pio_count)
12467 return 1;
12468 }
12469
12470 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
12471 return 0;
12472}
12473
12474int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
12475 unsigned int port, void *data, unsigned int count,
12476 int in)
12477{
12478 vcpu->arch.sev_pio_data = data;
12479 vcpu->arch.sev_pio_count = count;
12480 return in ? kvm_sev_es_ins(vcpu, size, port)
12481 : kvm_sev_es_outs(vcpu, size, port);
12482}
12483EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
12484
12485EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
12486EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
12487EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
12488EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
12489EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
12490EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
12491EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
12492EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
12493EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
12494EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
12495EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
12496EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
12497EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
12498EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
12499EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
12500EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
12501EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
12502EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
12503EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
12504EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
12505EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
12506EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
12507EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
12508EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
12509EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
12510EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
12511EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
12512