1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kvm_host.h>
20#include "irq.h"
21#include "ioapic.h"
22#include "mmu.h"
23#include "i8254.h"
24#include "tss.h"
25#include "kvm_cache_regs.h"
26#include "kvm_emulate.h"
27#include "x86.h"
28#include "cpuid.h"
29#include "pmu.h"
30#include "hyperv.h"
31#include "lapic.h"
32#include "xen.h"
33
34#include <linux/clocksource.h>
35#include <linux/interrupt.h>
36#include <linux/kvm.h>
37#include <linux/fs.h>
38#include <linux/vmalloc.h>
39#include <linux/export.h>
40#include <linux/moduleparam.h>
41#include <linux/mman.h>
42#include <linux/highmem.h>
43#include <linux/iommu.h>
44#include <linux/intel-iommu.h>
45#include <linux/cpufreq.h>
46#include <linux/user-return-notifier.h>
47#include <linux/srcu.h>
48#include <linux/slab.h>
49#include <linux/perf_event.h>
50#include <linux/uaccess.h>
51#include <linux/hash.h>
52#include <linux/pci.h>
53#include <linux/timekeeper_internal.h>
54#include <linux/pvclock_gtod.h>
55#include <linux/kvm_irqfd.h>
56#include <linux/irqbypass.h>
57#include <linux/sched/stat.h>
58#include <linux/sched/isolation.h>
59#include <linux/mem_encrypt.h>
60#include <linux/entry-kvm.h>
61#include <linux/suspend.h>
62
63#include <trace/events/kvm.h>
64
65#include <asm/debugreg.h>
66#include <asm/msr.h>
67#include <asm/desc.h>
68#include <asm/mce.h>
69#include <asm/pkru.h>
70#include <linux/kernel_stat.h>
71#include <asm/fpu/api.h>
72#include <asm/fpu/xcr.h>
73#include <asm/fpu/xstate.h>
74#include <asm/pvclock.h>
75#include <asm/div64.h>
76#include <asm/irq_remapping.h>
77#include <asm/mshyperv.h>
78#include <asm/hypervisor.h>
79#include <asm/tlbflush.h>
80#include <asm/intel_pt.h>
81#include <asm/emulate_prefix.h>
82#include <asm/sgx.h>
83#include <clocksource/hyperv_timer.h>
84
85#define CREATE_TRACE_POINTS
86#include "trace.h"
87
88#define MAX_IO_MSRS 256
89#define KVM_MAX_MCE_BANKS 32
90u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
91EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
92
93#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
94
95#define emul_to_vcpu(ctxt) \
96 ((struct kvm_vcpu *)(ctxt)->vcpu)
97
98
99
100
101
102#ifdef CONFIG_X86_64
103static
104u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
105#else
106static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
107#endif
108
109static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
110
111#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
112
113#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
114 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
115
116static void update_cr8_intercept(struct kvm_vcpu *vcpu);
117static void process_nmi(struct kvm_vcpu *vcpu);
118static void process_smi(struct kvm_vcpu *vcpu);
119static void enter_smm(struct kvm_vcpu *vcpu);
120static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
121static void store_regs(struct kvm_vcpu *vcpu);
122static int sync_regs(struct kvm_vcpu *vcpu);
123static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu);
124
125static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
126static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
127
128struct kvm_x86_ops kvm_x86_ops __read_mostly;
129EXPORT_SYMBOL_GPL(kvm_x86_ops);
130
131#define KVM_X86_OP(func) \
132 DEFINE_STATIC_CALL_NULL(kvm_x86_##func, \
133 *(((struct kvm_x86_ops *)0)->func));
134#define KVM_X86_OP_NULL KVM_X86_OP
135#include <asm/kvm-x86-ops.h>
136EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
137EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
138EXPORT_STATIC_CALL_GPL(kvm_x86_tlb_flush_current);
139
140static bool __read_mostly ignore_msrs = 0;
141module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
142
143bool __read_mostly report_ignored_msrs = true;
144module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
145EXPORT_SYMBOL_GPL(report_ignored_msrs);
146
147unsigned int min_timer_period_us = 200;
148module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
149
150static bool __read_mostly kvmclock_periodic_sync = true;
151module_param(kvmclock_periodic_sync, bool, S_IRUGO);
152
153bool __read_mostly kvm_has_tsc_control;
154EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
155u32 __read_mostly kvm_max_guest_tsc_khz;
156EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
157u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
158EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
159u64 __read_mostly kvm_max_tsc_scaling_ratio;
160EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
161u64 __read_mostly kvm_default_tsc_scaling_ratio;
162EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
163bool __read_mostly kvm_has_bus_lock_exit;
164EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit);
165
166
167static u32 __read_mostly tsc_tolerance_ppm = 250;
168module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
169
170
171
172
173
174
175
176static int __read_mostly lapic_timer_advance_ns = -1;
177module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
178
179static bool __read_mostly vector_hashing = true;
180module_param(vector_hashing, bool, S_IRUGO);
181
182bool __read_mostly enable_vmware_backdoor = false;
183module_param(enable_vmware_backdoor, bool, S_IRUGO);
184EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
185
186static bool __read_mostly force_emulation_prefix = false;
187module_param(force_emulation_prefix, bool, S_IRUGO);
188
189int __read_mostly pi_inject_timer = -1;
190module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
191
192
193bool __read_mostly enable_pmu = true;
194EXPORT_SYMBOL_GPL(enable_pmu);
195module_param(enable_pmu, bool, 0444);
196
197
198
199
200
201
202#define KVM_MAX_NR_USER_RETURN_MSRS 16
203
204struct kvm_user_return_msrs {
205 struct user_return_notifier urn;
206 bool registered;
207 struct kvm_user_return_msr_values {
208 u64 host;
209 u64 curr;
210 } values[KVM_MAX_NR_USER_RETURN_MSRS];
211};
212
213u32 __read_mostly kvm_nr_uret_msrs;
214EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
215static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
216static struct kvm_user_return_msrs __percpu *user_return_msrs;
217
218#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
219 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
220 | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
221 | XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
222
223u64 __read_mostly host_efer;
224EXPORT_SYMBOL_GPL(host_efer);
225
226bool __read_mostly allow_smaller_maxphyaddr = 0;
227EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
228
229bool __read_mostly enable_apicv = true;
230EXPORT_SYMBOL_GPL(enable_apicv);
231
232u64 __read_mostly host_xss;
233EXPORT_SYMBOL_GPL(host_xss);
234u64 __read_mostly supported_xss;
235EXPORT_SYMBOL_GPL(supported_xss);
236
237const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
238 KVM_GENERIC_VM_STATS(),
239 STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
240 STATS_DESC_COUNTER(VM, mmu_pte_write),
241 STATS_DESC_COUNTER(VM, mmu_pde_zapped),
242 STATS_DESC_COUNTER(VM, mmu_flooded),
243 STATS_DESC_COUNTER(VM, mmu_recycled),
244 STATS_DESC_COUNTER(VM, mmu_cache_miss),
245 STATS_DESC_ICOUNTER(VM, mmu_unsync),
246 STATS_DESC_ICOUNTER(VM, pages_4k),
247 STATS_DESC_ICOUNTER(VM, pages_2m),
248 STATS_DESC_ICOUNTER(VM, pages_1g),
249 STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
250 STATS_DESC_PCOUNTER(VM, max_mmu_rmap_size),
251 STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
252};
253
254const struct kvm_stats_header kvm_vm_stats_header = {
255 .name_size = KVM_STATS_NAME_SIZE,
256 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
257 .id_offset = sizeof(struct kvm_stats_header),
258 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
259 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
260 sizeof(kvm_vm_stats_desc),
261};
262
263const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
264 KVM_GENERIC_VCPU_STATS(),
265 STATS_DESC_COUNTER(VCPU, pf_fixed),
266 STATS_DESC_COUNTER(VCPU, pf_guest),
267 STATS_DESC_COUNTER(VCPU, tlb_flush),
268 STATS_DESC_COUNTER(VCPU, invlpg),
269 STATS_DESC_COUNTER(VCPU, exits),
270 STATS_DESC_COUNTER(VCPU, io_exits),
271 STATS_DESC_COUNTER(VCPU, mmio_exits),
272 STATS_DESC_COUNTER(VCPU, signal_exits),
273 STATS_DESC_COUNTER(VCPU, irq_window_exits),
274 STATS_DESC_COUNTER(VCPU, nmi_window_exits),
275 STATS_DESC_COUNTER(VCPU, l1d_flush),
276 STATS_DESC_COUNTER(VCPU, halt_exits),
277 STATS_DESC_COUNTER(VCPU, request_irq_exits),
278 STATS_DESC_COUNTER(VCPU, irq_exits),
279 STATS_DESC_COUNTER(VCPU, host_state_reload),
280 STATS_DESC_COUNTER(VCPU, fpu_reload),
281 STATS_DESC_COUNTER(VCPU, insn_emulation),
282 STATS_DESC_COUNTER(VCPU, insn_emulation_fail),
283 STATS_DESC_COUNTER(VCPU, hypercalls),
284 STATS_DESC_COUNTER(VCPU, irq_injections),
285 STATS_DESC_COUNTER(VCPU, nmi_injections),
286 STATS_DESC_COUNTER(VCPU, req_event),
287 STATS_DESC_COUNTER(VCPU, nested_run),
288 STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
289 STATS_DESC_COUNTER(VCPU, directed_yield_successful),
290 STATS_DESC_ICOUNTER(VCPU, guest_mode)
291};
292
293const struct kvm_stats_header kvm_vcpu_stats_header = {
294 .name_size = KVM_STATS_NAME_SIZE,
295 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
296 .id_offset = sizeof(struct kvm_stats_header),
297 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
298 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
299 sizeof(kvm_vcpu_stats_desc),
300};
301
302u64 __read_mostly host_xcr0;
303u64 __read_mostly supported_xcr0;
304EXPORT_SYMBOL_GPL(supported_xcr0);
305
306static struct kmem_cache *x86_emulator_cache;
307
308
309
310
311
312static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
313{
314 const char *op = write ? "wrmsr" : "rdmsr";
315
316 if (ignore_msrs) {
317 if (report_ignored_msrs)
318 kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
319 op, msr, data);
320
321 return true;
322 } else {
323 kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
324 op, msr, data);
325 return false;
326 }
327}
328
329static struct kmem_cache *kvm_alloc_emulator_cache(void)
330{
331 unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
332 unsigned int size = sizeof(struct x86_emulate_ctxt);
333
334 return kmem_cache_create_usercopy("x86_emulator", size,
335 __alignof__(struct x86_emulate_ctxt),
336 SLAB_ACCOUNT, useroffset,
337 size - useroffset, NULL);
338}
339
340static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
341
342static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
343{
344 int i;
345 for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
346 vcpu->arch.apf.gfns[i] = ~0;
347}
348
349static void kvm_on_user_return(struct user_return_notifier *urn)
350{
351 unsigned slot;
352 struct kvm_user_return_msrs *msrs
353 = container_of(urn, struct kvm_user_return_msrs, urn);
354 struct kvm_user_return_msr_values *values;
355 unsigned long flags;
356
357
358
359
360
361 local_irq_save(flags);
362 if (msrs->registered) {
363 msrs->registered = false;
364 user_return_notifier_unregister(urn);
365 }
366 local_irq_restore(flags);
367 for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
368 values = &msrs->values[slot];
369 if (values->host != values->curr) {
370 wrmsrl(kvm_uret_msrs_list[slot], values->host);
371 values->curr = values->host;
372 }
373 }
374}
375
376static int kvm_probe_user_return_msr(u32 msr)
377{
378 u64 val;
379 int ret;
380
381 preempt_disable();
382 ret = rdmsrl_safe(msr, &val);
383 if (ret)
384 goto out;
385 ret = wrmsrl_safe(msr, val);
386out:
387 preempt_enable();
388 return ret;
389}
390
391int kvm_add_user_return_msr(u32 msr)
392{
393 BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
394
395 if (kvm_probe_user_return_msr(msr))
396 return -1;
397
398 kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
399 return kvm_nr_uret_msrs++;
400}
401EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
402
403int kvm_find_user_return_msr(u32 msr)
404{
405 int i;
406
407 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
408 if (kvm_uret_msrs_list[i] == msr)
409 return i;
410 }
411 return -1;
412}
413EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
414
415static void kvm_user_return_msr_cpu_online(void)
416{
417 unsigned int cpu = smp_processor_id();
418 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
419 u64 value;
420 int i;
421
422 for (i = 0; i < kvm_nr_uret_msrs; ++i) {
423 rdmsrl_safe(kvm_uret_msrs_list[i], &value);
424 msrs->values[i].host = value;
425 msrs->values[i].curr = value;
426 }
427}
428
429int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
430{
431 unsigned int cpu = smp_processor_id();
432 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
433 int err;
434
435 value = (value & mask) | (msrs->values[slot].host & ~mask);
436 if (value == msrs->values[slot].curr)
437 return 0;
438 err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
439 if (err)
440 return 1;
441
442 msrs->values[slot].curr = value;
443 if (!msrs->registered) {
444 msrs->urn.on_user_return = kvm_on_user_return;
445 user_return_notifier_register(&msrs->urn);
446 msrs->registered = true;
447 }
448 return 0;
449}
450EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
451
452static void drop_user_return_notifiers(void)
453{
454 unsigned int cpu = smp_processor_id();
455 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
456
457 if (msrs->registered)
458 kvm_on_user_return(&msrs->urn);
459}
460
461u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
462{
463 return vcpu->arch.apic_base;
464}
465EXPORT_SYMBOL_GPL(kvm_get_apic_base);
466
467enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
468{
469 return kvm_apic_mode(kvm_get_apic_base(vcpu));
470}
471EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
472
473int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
474{
475 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
476 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
477 u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
478 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
479
480 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
481 return 1;
482 if (!msr_info->host_initiated) {
483 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
484 return 1;
485 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
486 return 1;
487 }
488
489 kvm_lapic_set_base(vcpu, msr_info->data);
490 kvm_recalculate_apic_map(vcpu->kvm);
491 return 0;
492}
493EXPORT_SYMBOL_GPL(kvm_set_apic_base);
494
495
496
497
498
499
500
501
502noinstr void kvm_spurious_fault(void)
503{
504
505 BUG_ON(!kvm_rebooting);
506}
507EXPORT_SYMBOL_GPL(kvm_spurious_fault);
508
509#define EXCPT_BENIGN 0
510#define EXCPT_CONTRIBUTORY 1
511#define EXCPT_PF 2
512
513static int exception_class(int vector)
514{
515 switch (vector) {
516 case PF_VECTOR:
517 return EXCPT_PF;
518 case DE_VECTOR:
519 case TS_VECTOR:
520 case NP_VECTOR:
521 case SS_VECTOR:
522 case GP_VECTOR:
523 return EXCPT_CONTRIBUTORY;
524 default:
525 break;
526 }
527 return EXCPT_BENIGN;
528}
529
530#define EXCPT_FAULT 0
531#define EXCPT_TRAP 1
532#define EXCPT_ABORT 2
533#define EXCPT_INTERRUPT 3
534
535static int exception_type(int vector)
536{
537 unsigned int mask;
538
539 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
540 return EXCPT_INTERRUPT;
541
542 mask = 1 << vector;
543
544
545 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
546 return EXCPT_TRAP;
547
548 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
549 return EXCPT_ABORT;
550
551
552 return EXCPT_FAULT;
553}
554
555void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
556{
557 unsigned nr = vcpu->arch.exception.nr;
558 bool has_payload = vcpu->arch.exception.has_payload;
559 unsigned long payload = vcpu->arch.exception.payload;
560
561 if (!has_payload)
562 return;
563
564 switch (nr) {
565 case DB_VECTOR:
566
567
568
569
570
571 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588 vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
589 vcpu->arch.dr6 |= payload;
590 vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW;
591
592
593
594
595
596
597
598 vcpu->arch.dr6 &= ~BIT(12);
599 break;
600 case PF_VECTOR:
601 vcpu->arch.cr2 = payload;
602 break;
603 }
604
605 vcpu->arch.exception.has_payload = false;
606 vcpu->arch.exception.payload = 0;
607}
608EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
609
610static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
611 unsigned nr, bool has_error, u32 error_code,
612 bool has_payload, unsigned long payload, bool reinject)
613{
614 u32 prev_nr;
615 int class1, class2;
616
617 kvm_make_request(KVM_REQ_EVENT, vcpu);
618
619 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
620 queue:
621 if (reinject) {
622
623
624
625
626
627
628
629
630 WARN_ON_ONCE(vcpu->arch.exception.pending);
631 vcpu->arch.exception.injected = true;
632 if (WARN_ON_ONCE(has_payload)) {
633
634
635
636
637 has_payload = false;
638 payload = 0;
639 }
640 } else {
641 vcpu->arch.exception.pending = true;
642 vcpu->arch.exception.injected = false;
643 }
644 vcpu->arch.exception.has_error_code = has_error;
645 vcpu->arch.exception.nr = nr;
646 vcpu->arch.exception.error_code = error_code;
647 vcpu->arch.exception.has_payload = has_payload;
648 vcpu->arch.exception.payload = payload;
649 if (!is_guest_mode(vcpu))
650 kvm_deliver_exception_payload(vcpu);
651 return;
652 }
653
654
655 prev_nr = vcpu->arch.exception.nr;
656 if (prev_nr == DF_VECTOR) {
657
658 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
659 return;
660 }
661 class1 = exception_class(prev_nr);
662 class2 = exception_class(nr);
663 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
664 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
665
666
667
668
669
670 vcpu->arch.exception.pending = true;
671 vcpu->arch.exception.injected = false;
672 vcpu->arch.exception.has_error_code = true;
673 vcpu->arch.exception.nr = DF_VECTOR;
674 vcpu->arch.exception.error_code = 0;
675 vcpu->arch.exception.has_payload = false;
676 vcpu->arch.exception.payload = 0;
677 } else
678
679
680
681 goto queue;
682}
683
684void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
685{
686 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
687}
688EXPORT_SYMBOL_GPL(kvm_queue_exception);
689
690void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
691{
692 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
693}
694EXPORT_SYMBOL_GPL(kvm_requeue_exception);
695
696void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
697 unsigned long payload)
698{
699 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
700}
701EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
702
703static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
704 u32 error_code, unsigned long payload)
705{
706 kvm_multiple_exception(vcpu, nr, true, error_code,
707 true, payload, false);
708}
709
710int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
711{
712 if (err)
713 kvm_inject_gp(vcpu, 0);
714 else
715 return kvm_skip_emulated_instruction(vcpu);
716
717 return 1;
718}
719EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
720
721static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
722{
723 if (err) {
724 kvm_inject_gp(vcpu, 0);
725 return 1;
726 }
727
728 return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE | EMULTYPE_SKIP |
729 EMULTYPE_COMPLETE_USER_EXIT);
730}
731
732void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
733{
734 ++vcpu->stat.pf_guest;
735 vcpu->arch.exception.nested_apf =
736 is_guest_mode(vcpu) && fault->async_page_fault;
737 if (vcpu->arch.exception.nested_apf) {
738 vcpu->arch.apf.nested_apf_token = fault->address;
739 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
740 } else {
741 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
742 fault->address);
743 }
744}
745EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
746
747bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
748 struct x86_exception *fault)
749{
750 struct kvm_mmu *fault_mmu;
751 WARN_ON_ONCE(fault->vector != PF_VECTOR);
752
753 fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
754 vcpu->arch.walk_mmu;
755
756
757
758
759
760 if ((fault->error_code & PFERR_PRESENT_MASK) &&
761 !(fault->error_code & PFERR_RSVD_MASK))
762 kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
763 fault_mmu->root_hpa);
764
765 fault_mmu->inject_page_fault(vcpu, fault);
766 return fault->nested_page_fault;
767}
768EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
769
770void kvm_inject_nmi(struct kvm_vcpu *vcpu)
771{
772 atomic_inc(&vcpu->arch.nmi_queued);
773 kvm_make_request(KVM_REQ_NMI, vcpu);
774}
775EXPORT_SYMBOL_GPL(kvm_inject_nmi);
776
777void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
778{
779 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
780}
781EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
782
783void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
784{
785 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
786}
787EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
788
789
790
791
792
793bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
794{
795 if (static_call(kvm_x86_get_cpl)(vcpu) <= required_cpl)
796 return true;
797 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
798 return false;
799}
800EXPORT_SYMBOL_GPL(kvm_require_cpl);
801
802bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
803{
804 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
805 return true;
806
807 kvm_queue_exception(vcpu, UD_VECTOR);
808 return false;
809}
810EXPORT_SYMBOL_GPL(kvm_require_dr);
811
812static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
813{
814 return vcpu->arch.reserved_gpa_bits | rsvd_bits(5, 8) | rsvd_bits(1, 2);
815}
816
817
818
819
820int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
821{
822 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
823 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
824 gpa_t real_gpa;
825 int i;
826 int ret;
827 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
828
829
830
831
832
833 real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
834 PFERR_USER_MASK | PFERR_WRITE_MASK, NULL);
835 if (real_gpa == UNMAPPED_GVA)
836 return 0;
837
838
839 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(real_gpa), pdpte,
840 cr3 & GENMASK(11, 5), sizeof(pdpte));
841 if (ret < 0)
842 return 0;
843
844 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
845 if ((pdpte[i] & PT_PRESENT_MASK) &&
846 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
847 return 0;
848 }
849 }
850
851
852
853
854
855 if (!tdp_enabled && memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)))
856 kvm_mmu_free_roots(vcpu, mmu, KVM_MMU_ROOT_CURRENT);
857
858 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
859 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
860 kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
861 vcpu->arch.pdptrs_from_userspace = false;
862
863 return 1;
864}
865EXPORT_SYMBOL_GPL(load_pdptrs);
866
867void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
868{
869 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
870 kvm_clear_async_pf_completion_queue(vcpu);
871 kvm_async_pf_hash_reset(vcpu);
872 }
873
874 if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
875 kvm_mmu_reset_context(vcpu);
876
877 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
878 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
879 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
880 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
881}
882EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
883
884int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
885{
886 unsigned long old_cr0 = kvm_read_cr0(vcpu);
887
888 cr0 |= X86_CR0_ET;
889
890#ifdef CONFIG_X86_64
891 if (cr0 & 0xffffffff00000000UL)
892 return 1;
893#endif
894
895 cr0 &= ~CR0_RESERVED_BITS;
896
897 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
898 return 1;
899
900 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
901 return 1;
902
903#ifdef CONFIG_X86_64
904 if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
905 (cr0 & X86_CR0_PG)) {
906 int cs_db, cs_l;
907
908 if (!is_pae(vcpu))
909 return 1;
910 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
911 if (cs_l)
912 return 1;
913 }
914#endif
915 if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
916 is_pae(vcpu) && ((cr0 ^ old_cr0) & X86_CR0_PDPTR_BITS) &&
917 !load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
918 return 1;
919
920 if (!(cr0 & X86_CR0_PG) &&
921 (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
922 return 1;
923
924 static_call(kvm_x86_set_cr0)(vcpu, cr0);
925
926 kvm_post_set_cr0(vcpu, old_cr0, cr0);
927
928 return 0;
929}
930EXPORT_SYMBOL_GPL(kvm_set_cr0);
931
932void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
933{
934 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
935}
936EXPORT_SYMBOL_GPL(kvm_lmsw);
937
938void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
939{
940 if (vcpu->arch.guest_state_protected)
941 return;
942
943 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
944
945 if (vcpu->arch.xcr0 != host_xcr0)
946 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
947
948 if (vcpu->arch.xsaves_enabled &&
949 vcpu->arch.ia32_xss != host_xss)
950 wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
951 }
952
953 if (static_cpu_has(X86_FEATURE_PKU) &&
954 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
955 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
956 vcpu->arch.pkru != vcpu->arch.host_pkru)
957 write_pkru(vcpu->arch.pkru);
958}
959EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
960
961void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
962{
963 if (vcpu->arch.guest_state_protected)
964 return;
965
966 if (static_cpu_has(X86_FEATURE_PKU) &&
967 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
968 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
969 vcpu->arch.pkru = rdpkru();
970 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
971 write_pkru(vcpu->arch.host_pkru);
972 }
973
974 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
975
976 if (vcpu->arch.xcr0 != host_xcr0)
977 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
978
979 if (vcpu->arch.xsaves_enabled &&
980 vcpu->arch.ia32_xss != host_xss)
981 wrmsrl(MSR_IA32_XSS, host_xss);
982 }
983
984}
985EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
986
987static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu)
988{
989 return vcpu->arch.guest_fpu.fpstate->user_xfeatures;
990}
991
992#ifdef CONFIG_X86_64
993static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
994{
995 return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC;
996}
997#endif
998
999static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
1000{
1001 u64 xcr0 = xcr;
1002 u64 old_xcr0 = vcpu->arch.xcr0;
1003 u64 valid_bits;
1004
1005
1006 if (index != XCR_XFEATURE_ENABLED_MASK)
1007 return 1;
1008 if (!(xcr0 & XFEATURE_MASK_FP))
1009 return 1;
1010 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
1011 return 1;
1012
1013
1014
1015
1016
1017
1018 valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP;
1019 if (xcr0 & ~valid_bits)
1020 return 1;
1021
1022 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
1023 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
1024 return 1;
1025
1026 if (xcr0 & XFEATURE_MASK_AVX512) {
1027 if (!(xcr0 & XFEATURE_MASK_YMM))
1028 return 1;
1029 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
1030 return 1;
1031 }
1032
1033 if ((xcr0 & XFEATURE_MASK_XTILE) &&
1034 ((xcr0 & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE))
1035 return 1;
1036
1037 vcpu->arch.xcr0 = xcr0;
1038
1039 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
1040 kvm_update_cpuid_runtime(vcpu);
1041 return 0;
1042}
1043
1044int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
1045{
1046 if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
1047 __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
1048 kvm_inject_gp(vcpu, 0);
1049 return 1;
1050 }
1051
1052 return kvm_skip_emulated_instruction(vcpu);
1053}
1054EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
1055
1056bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1057{
1058 if (cr4 & cr4_reserved_bits)
1059 return false;
1060
1061 if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
1062 return false;
1063
1064 return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
1065}
1066EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
1067
1068void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
1069{
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086 if ((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS)
1087 kvm_mmu_reset_context(vcpu);
1088 else if ((cr4 ^ old_cr4) & X86_CR4_PCIDE)
1089 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
1090 else if ((cr4 ^ old_cr4) & X86_CR4_PGE)
1091 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1092}
1093EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
1094
1095int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1096{
1097 unsigned long old_cr4 = kvm_read_cr4(vcpu);
1098
1099 if (!kvm_is_valid_cr4(vcpu, cr4))
1100 return 1;
1101
1102 if (is_long_mode(vcpu)) {
1103 if (!(cr4 & X86_CR4_PAE))
1104 return 1;
1105 if ((cr4 ^ old_cr4) & X86_CR4_LA57)
1106 return 1;
1107 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
1108 && ((cr4 ^ old_cr4) & X86_CR4_PDPTR_BITS)
1109 && !load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
1110 return 1;
1111
1112 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
1113 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
1114 return 1;
1115
1116
1117 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
1118 return 1;
1119 }
1120
1121 static_call(kvm_x86_set_cr4)(vcpu, cr4);
1122
1123 kvm_post_set_cr4(vcpu, old_cr4, cr4);
1124
1125 return 0;
1126}
1127EXPORT_SYMBOL_GPL(kvm_set_cr4);
1128
1129static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
1130{
1131 struct kvm_mmu *mmu = vcpu->arch.mmu;
1132 unsigned long roots_to_free = 0;
1133 int i;
1134
1135
1136
1137
1138
1139
1140
1141
1142 if (unlikely(tdp_enabled)) {
1143 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
1144 return;
1145 }
1146
1147
1148
1149
1150
1151
1152 if (kvm_get_active_pcid(vcpu) == pcid) {
1153 kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
1154 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1155 }
1156
1157
1158
1159
1160
1161
1162 if (!kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
1163 return;
1164
1165 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
1166 if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
1167 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
1168
1169 kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
1170}
1171
1172int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1173{
1174 bool skip_tlb_flush = false;
1175 unsigned long pcid = 0;
1176#ifdef CONFIG_X86_64
1177 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
1178
1179 if (pcid_enabled) {
1180 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
1181 cr3 &= ~X86_CR3_PCID_NOFLUSH;
1182 pcid = cr3 & X86_CR3_PCID_MASK;
1183 }
1184#endif
1185
1186
1187 if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu))
1188 goto handle_tlb_flush;
1189
1190
1191
1192
1193
1194
1195 if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
1196 return 1;
1197
1198 if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, cr3))
1199 return 1;
1200
1201 if (cr3 != kvm_read_cr3(vcpu))
1202 kvm_mmu_new_pgd(vcpu, cr3);
1203
1204 vcpu->arch.cr3 = cr3;
1205 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
1206
1207
1208handle_tlb_flush:
1209
1210
1211
1212
1213
1214
1215
1216 if (!skip_tlb_flush)
1217 kvm_invalidate_pcid(vcpu, pcid);
1218
1219 return 0;
1220}
1221EXPORT_SYMBOL_GPL(kvm_set_cr3);
1222
1223int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
1224{
1225 if (cr8 & CR8_RESERVED_BITS)
1226 return 1;
1227 if (lapic_in_kernel(vcpu))
1228 kvm_lapic_set_tpr(vcpu, cr8);
1229 else
1230 vcpu->arch.cr8 = cr8;
1231 return 0;
1232}
1233EXPORT_SYMBOL_GPL(kvm_set_cr8);
1234
1235unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1236{
1237 if (lapic_in_kernel(vcpu))
1238 return kvm_lapic_get_cr8(vcpu);
1239 else
1240 return vcpu->arch.cr8;
1241}
1242EXPORT_SYMBOL_GPL(kvm_get_cr8);
1243
1244static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1245{
1246 int i;
1247
1248 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1249 for (i = 0; i < KVM_NR_DB_REGS; i++)
1250 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1251 }
1252}
1253
1254void kvm_update_dr7(struct kvm_vcpu *vcpu)
1255{
1256 unsigned long dr7;
1257
1258 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1259 dr7 = vcpu->arch.guest_debug_dr7;
1260 else
1261 dr7 = vcpu->arch.dr7;
1262 static_call(kvm_x86_set_dr7)(vcpu, dr7);
1263 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1264 if (dr7 & DR7_BP_EN_MASK)
1265 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1266}
1267EXPORT_SYMBOL_GPL(kvm_update_dr7);
1268
1269static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1270{
1271 u64 fixed = DR6_FIXED_1;
1272
1273 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1274 fixed |= DR6_RTM;
1275
1276 if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
1277 fixed |= DR6_BUS_LOCK;
1278 return fixed;
1279}
1280
1281int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1282{
1283 size_t size = ARRAY_SIZE(vcpu->arch.db);
1284
1285 switch (dr) {
1286 case 0 ... 3:
1287 vcpu->arch.db[array_index_nospec(dr, size)] = val;
1288 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1289 vcpu->arch.eff_db[dr] = val;
1290 break;
1291 case 4:
1292 case 6:
1293 if (!kvm_dr6_valid(val))
1294 return 1;
1295 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1296 break;
1297 case 5:
1298 default:
1299 if (!kvm_dr7_valid(val))
1300 return 1;
1301 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1302 kvm_update_dr7(vcpu);
1303 break;
1304 }
1305
1306 return 0;
1307}
1308EXPORT_SYMBOL_GPL(kvm_set_dr);
1309
1310void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1311{
1312 size_t size = ARRAY_SIZE(vcpu->arch.db);
1313
1314 switch (dr) {
1315 case 0 ... 3:
1316 *val = vcpu->arch.db[array_index_nospec(dr, size)];
1317 break;
1318 case 4:
1319 case 6:
1320 *val = vcpu->arch.dr6;
1321 break;
1322 case 5:
1323 default:
1324 *val = vcpu->arch.dr7;
1325 break;
1326 }
1327}
1328EXPORT_SYMBOL_GPL(kvm_get_dr);
1329
1330int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
1331{
1332 u32 ecx = kvm_rcx_read(vcpu);
1333 u64 data;
1334
1335 if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
1336 kvm_inject_gp(vcpu, 0);
1337 return 1;
1338 }
1339
1340 kvm_rax_write(vcpu, (u32)data);
1341 kvm_rdx_write(vcpu, data >> 32);
1342 return kvm_skip_emulated_instruction(vcpu);
1343}
1344EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358static const u32 msrs_to_save_all[] = {
1359 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1360 MSR_STAR,
1361#ifdef CONFIG_X86_64
1362 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1363#endif
1364 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1365 MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1366 MSR_IA32_SPEC_CTRL,
1367 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1368 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1369 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1370 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1371 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1372 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1373 MSR_IA32_UMWAIT_CONTROL,
1374
1375 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1376 MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
1377 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1378 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1379 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1380 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1381 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1382 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1383 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1384 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1385 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1386 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1387 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1388 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1389 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1390 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1391 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1392 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1393 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1394 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1395 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1396 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1397
1398 MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
1399 MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
1400 MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2,
1401 MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
1402 MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
1403 MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
1404 MSR_IA32_XFD, MSR_IA32_XFD_ERR,
1405};
1406
1407static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1408static unsigned num_msrs_to_save;
1409
1410static const u32 emulated_msrs_all[] = {
1411 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1412 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1413 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1414 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1415 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1416 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1417 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1418 HV_X64_MSR_RESET,
1419 HV_X64_MSR_VP_INDEX,
1420 HV_X64_MSR_VP_RUNTIME,
1421 HV_X64_MSR_SCONTROL,
1422 HV_X64_MSR_STIMER0_CONFIG,
1423 HV_X64_MSR_VP_ASSIST_PAGE,
1424 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1425 HV_X64_MSR_TSC_EMULATION_STATUS,
1426 HV_X64_MSR_SYNDBG_OPTIONS,
1427 HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
1428 HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
1429 HV_X64_MSR_SYNDBG_PENDING_BUFFER,
1430
1431 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1432 MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
1433
1434 MSR_IA32_TSC_ADJUST,
1435 MSR_IA32_TSC_DEADLINE,
1436 MSR_IA32_ARCH_CAPABILITIES,
1437 MSR_IA32_PERF_CAPABILITIES,
1438 MSR_IA32_MISC_ENABLE,
1439 MSR_IA32_MCG_STATUS,
1440 MSR_IA32_MCG_CTL,
1441 MSR_IA32_MCG_EXT_CTL,
1442 MSR_IA32_SMBASE,
1443 MSR_SMI_COUNT,
1444 MSR_PLATFORM_INFO,
1445 MSR_MISC_FEATURES_ENABLES,
1446 MSR_AMD64_VIRT_SPEC_CTRL,
1447 MSR_AMD64_TSC_RATIO,
1448 MSR_IA32_POWER_CTL,
1449 MSR_IA32_UCODE_REV,
1450
1451
1452
1453
1454
1455
1456
1457
1458 MSR_IA32_VMX_BASIC,
1459 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1460 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1461 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1462 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1463 MSR_IA32_VMX_MISC,
1464 MSR_IA32_VMX_CR0_FIXED0,
1465 MSR_IA32_VMX_CR4_FIXED0,
1466 MSR_IA32_VMX_VMCS_ENUM,
1467 MSR_IA32_VMX_PROCBASED_CTLS2,
1468 MSR_IA32_VMX_EPT_VPID_CAP,
1469 MSR_IA32_VMX_VMFUNC,
1470
1471 MSR_K7_HWCR,
1472 MSR_KVM_POLL_CONTROL,
1473};
1474
1475static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1476static unsigned num_emulated_msrs;
1477
1478
1479
1480
1481
1482static const u32 msr_based_features_all[] = {
1483 MSR_IA32_VMX_BASIC,
1484 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1485 MSR_IA32_VMX_PINBASED_CTLS,
1486 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1487 MSR_IA32_VMX_PROCBASED_CTLS,
1488 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1489 MSR_IA32_VMX_EXIT_CTLS,
1490 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1491 MSR_IA32_VMX_ENTRY_CTLS,
1492 MSR_IA32_VMX_MISC,
1493 MSR_IA32_VMX_CR0_FIXED0,
1494 MSR_IA32_VMX_CR0_FIXED1,
1495 MSR_IA32_VMX_CR4_FIXED0,
1496 MSR_IA32_VMX_CR4_FIXED1,
1497 MSR_IA32_VMX_VMCS_ENUM,
1498 MSR_IA32_VMX_PROCBASED_CTLS2,
1499 MSR_IA32_VMX_EPT_VPID_CAP,
1500 MSR_IA32_VMX_VMFUNC,
1501
1502 MSR_F10H_DECFG,
1503 MSR_IA32_UCODE_REV,
1504 MSR_IA32_ARCH_CAPABILITIES,
1505 MSR_IA32_PERF_CAPABILITIES,
1506};
1507
1508static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1509static unsigned int num_msr_based_features;
1510
1511static u64 kvm_get_arch_capabilities(void)
1512{
1513 u64 data = 0;
1514
1515 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1516 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1517
1518
1519
1520
1521
1522
1523
1524 data |= ARCH_CAP_PSCHANGE_MC_NO;
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1536 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1537
1538 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1539 data |= ARCH_CAP_RDCL_NO;
1540 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1541 data |= ARCH_CAP_SSB_NO;
1542 if (!boot_cpu_has_bug(X86_BUG_MDS))
1543 data |= ARCH_CAP_MDS_NO;
1544
1545 if (!boot_cpu_has(X86_FEATURE_RTM)) {
1546
1547
1548
1549
1550
1551
1552
1553 data &= ~ARCH_CAP_TAA_NO;
1554 } else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
1555 data |= ARCH_CAP_TAA_NO;
1556 } else {
1557
1558
1559
1560
1561
1562 }
1563
1564 return data;
1565}
1566
1567static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1568{
1569 switch (msr->index) {
1570 case MSR_IA32_ARCH_CAPABILITIES:
1571 msr->data = kvm_get_arch_capabilities();
1572 break;
1573 case MSR_IA32_UCODE_REV:
1574 rdmsrl_safe(msr->index, &msr->data);
1575 break;
1576 default:
1577 return static_call(kvm_x86_get_msr_feature)(msr);
1578 }
1579 return 0;
1580}
1581
1582static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1583{
1584 struct kvm_msr_entry msr;
1585 int r;
1586
1587 msr.index = index;
1588 r = kvm_get_msr_feature(&msr);
1589
1590 if (r == KVM_MSR_RET_INVALID) {
1591
1592 *data = 0;
1593 if (kvm_msr_ignored_check(index, 0, false))
1594 r = 0;
1595 }
1596
1597 if (r)
1598 return r;
1599
1600 *data = msr.data;
1601
1602 return 0;
1603}
1604
1605static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1606{
1607 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1608 return false;
1609
1610 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1611 return false;
1612
1613 if (efer & (EFER_LME | EFER_LMA) &&
1614 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1615 return false;
1616
1617 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1618 return false;
1619
1620 return true;
1621
1622}
1623bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1624{
1625 if (efer & efer_reserved_bits)
1626 return false;
1627
1628 return __kvm_valid_efer(vcpu, efer);
1629}
1630EXPORT_SYMBOL_GPL(kvm_valid_efer);
1631
1632static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1633{
1634 u64 old_efer = vcpu->arch.efer;
1635 u64 efer = msr_info->data;
1636 int r;
1637
1638 if (efer & efer_reserved_bits)
1639 return 1;
1640
1641 if (!msr_info->host_initiated) {
1642 if (!__kvm_valid_efer(vcpu, efer))
1643 return 1;
1644
1645 if (is_paging(vcpu) &&
1646 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1647 return 1;
1648 }
1649
1650 efer &= ~EFER_LMA;
1651 efer |= vcpu->arch.efer & EFER_LMA;
1652
1653 r = static_call(kvm_x86_set_efer)(vcpu, efer);
1654 if (r) {
1655 WARN_ON(r > 0);
1656 return r;
1657 }
1658
1659
1660 if ((efer ^ old_efer) & EFER_NX)
1661 kvm_mmu_reset_context(vcpu);
1662
1663 return 0;
1664}
1665
1666void kvm_enable_efer_bits(u64 mask)
1667{
1668 efer_reserved_bits &= ~mask;
1669}
1670EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1671
1672bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
1673{
1674 struct kvm_x86_msr_filter *msr_filter;
1675 struct msr_bitmap_range *ranges;
1676 struct kvm *kvm = vcpu->kvm;
1677 bool allowed;
1678 int idx;
1679 u32 i;
1680
1681
1682 if (index >= 0x800 && index <= 0x8ff)
1683 return true;
1684
1685 idx = srcu_read_lock(&kvm->srcu);
1686
1687 msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
1688 if (!msr_filter) {
1689 allowed = true;
1690 goto out;
1691 }
1692
1693 allowed = msr_filter->default_allow;
1694 ranges = msr_filter->ranges;
1695
1696 for (i = 0; i < msr_filter->count; i++) {
1697 u32 start = ranges[i].base;
1698 u32 end = start + ranges[i].nmsrs;
1699 u32 flags = ranges[i].flags;
1700 unsigned long *bitmap = ranges[i].bitmap;
1701
1702 if ((index >= start) && (index < end) && (flags & type)) {
1703 allowed = !!test_bit(index - start, bitmap);
1704 break;
1705 }
1706 }
1707
1708out:
1709 srcu_read_unlock(&kvm->srcu, idx);
1710
1711 return allowed;
1712}
1713EXPORT_SYMBOL_GPL(kvm_msr_allowed);
1714
1715
1716
1717
1718
1719
1720
1721static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1722 bool host_initiated)
1723{
1724 struct msr_data msr;
1725
1726 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
1727 return KVM_MSR_RET_FILTERED;
1728
1729 switch (index) {
1730 case MSR_FS_BASE:
1731 case MSR_GS_BASE:
1732 case MSR_KERNEL_GS_BASE:
1733 case MSR_CSTAR:
1734 case MSR_LSTAR:
1735 if (is_noncanonical_address(data, vcpu))
1736 return 1;
1737 break;
1738 case MSR_IA32_SYSENTER_EIP:
1739 case MSR_IA32_SYSENTER_ESP:
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
1753 break;
1754 case MSR_TSC_AUX:
1755 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1756 return 1;
1757
1758 if (!host_initiated &&
1759 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1760 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1761 return 1;
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772 if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
1773 return 1;
1774
1775 data = (u32)data;
1776 break;
1777 }
1778
1779 msr.data = data;
1780 msr.index = index;
1781 msr.host_initiated = host_initiated;
1782
1783 return static_call(kvm_x86_set_msr)(vcpu, &msr);
1784}
1785
1786static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
1787 u32 index, u64 data, bool host_initiated)
1788{
1789 int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
1790
1791 if (ret == KVM_MSR_RET_INVALID)
1792 if (kvm_msr_ignored_check(index, data, true))
1793 ret = 0;
1794
1795 return ret;
1796}
1797
1798
1799
1800
1801
1802
1803
1804int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1805 bool host_initiated)
1806{
1807 struct msr_data msr;
1808 int ret;
1809
1810 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
1811 return KVM_MSR_RET_FILTERED;
1812
1813 switch (index) {
1814 case MSR_TSC_AUX:
1815 if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
1816 return 1;
1817
1818 if (!host_initiated &&
1819 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
1820 !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
1821 return 1;
1822 break;
1823 }
1824
1825 msr.index = index;
1826 msr.host_initiated = host_initiated;
1827
1828 ret = static_call(kvm_x86_get_msr)(vcpu, &msr);
1829 if (!ret)
1830 *data = msr.data;
1831 return ret;
1832}
1833
1834static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
1835 u32 index, u64 *data, bool host_initiated)
1836{
1837 int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
1838
1839 if (ret == KVM_MSR_RET_INVALID) {
1840
1841 *data = 0;
1842 if (kvm_msr_ignored_check(index, 0, false))
1843 ret = 0;
1844 }
1845
1846 return ret;
1847}
1848
1849int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1850{
1851 return kvm_get_msr_ignored_check(vcpu, index, data, false);
1852}
1853EXPORT_SYMBOL_GPL(kvm_get_msr);
1854
1855int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1856{
1857 return kvm_set_msr_ignored_check(vcpu, index, data, false);
1858}
1859EXPORT_SYMBOL_GPL(kvm_set_msr);
1860
1861static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu)
1862{
1863 if (!vcpu->run->msr.error) {
1864 kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
1865 kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
1866 }
1867}
1868
1869static int complete_emulated_msr_access(struct kvm_vcpu *vcpu)
1870{
1871 return complete_emulated_insn_gp(vcpu, vcpu->run->msr.error);
1872}
1873
1874static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
1875{
1876 complete_userspace_rdmsr(vcpu);
1877 return complete_emulated_msr_access(vcpu);
1878}
1879
1880static int complete_fast_msr_access(struct kvm_vcpu *vcpu)
1881{
1882 return static_call(kvm_x86_complete_emulated_msr)(vcpu, vcpu->run->msr.error);
1883}
1884
1885static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
1886{
1887 complete_userspace_rdmsr(vcpu);
1888 return complete_fast_msr_access(vcpu);
1889}
1890
1891static u64 kvm_msr_reason(int r)
1892{
1893 switch (r) {
1894 case KVM_MSR_RET_INVALID:
1895 return KVM_MSR_EXIT_REASON_UNKNOWN;
1896 case KVM_MSR_RET_FILTERED:
1897 return KVM_MSR_EXIT_REASON_FILTER;
1898 default:
1899 return KVM_MSR_EXIT_REASON_INVAL;
1900 }
1901}
1902
1903static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
1904 u32 exit_reason, u64 data,
1905 int (*completion)(struct kvm_vcpu *vcpu),
1906 int r)
1907{
1908 u64 msr_reason = kvm_msr_reason(r);
1909
1910
1911 if (!(vcpu->kvm->arch.user_space_msr_mask & msr_reason))
1912 return 0;
1913
1914 vcpu->run->exit_reason = exit_reason;
1915 vcpu->run->msr.error = 0;
1916 memset(vcpu->run->msr.pad, 0, sizeof(vcpu->run->msr.pad));
1917 vcpu->run->msr.reason = msr_reason;
1918 vcpu->run->msr.index = index;
1919 vcpu->run->msr.data = data;
1920 vcpu->arch.complete_userspace_io = completion;
1921
1922 return 1;
1923}
1924
1925int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1926{
1927 u32 ecx = kvm_rcx_read(vcpu);
1928 u64 data;
1929 int r;
1930
1931 r = kvm_get_msr(vcpu, ecx, &data);
1932
1933 if (!r) {
1934 trace_kvm_msr_read(ecx, data);
1935
1936 kvm_rax_write(vcpu, data & -1u);
1937 kvm_rdx_write(vcpu, (data >> 32) & -1u);
1938 } else {
1939
1940 if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0,
1941 complete_fast_rdmsr, r))
1942 return 0;
1943 trace_kvm_msr_read_ex(ecx);
1944 }
1945
1946 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
1947}
1948EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
1949
1950int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
1951{
1952 u32 ecx = kvm_rcx_read(vcpu);
1953 u64 data = kvm_read_edx_eax(vcpu);
1954 int r;
1955
1956 r = kvm_set_msr(vcpu, ecx, data);
1957
1958 if (!r) {
1959 trace_kvm_msr_write(ecx, data);
1960 } else {
1961
1962 if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data,
1963 complete_fast_msr_access, r))
1964 return 0;
1965
1966 if (r < 0)
1967 return r;
1968 trace_kvm_msr_write_ex(ecx, data);
1969 }
1970
1971 return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
1972}
1973EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
1974
1975int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
1976{
1977 return kvm_skip_emulated_instruction(vcpu);
1978}
1979EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
1980
1981int kvm_emulate_invd(struct kvm_vcpu *vcpu)
1982{
1983
1984 return kvm_emulate_as_nop(vcpu);
1985}
1986EXPORT_SYMBOL_GPL(kvm_emulate_invd);
1987
1988int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
1989{
1990 pr_warn_once("kvm: MWAIT instruction emulated as NOP!\n");
1991 return kvm_emulate_as_nop(vcpu);
1992}
1993EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
1994
1995int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
1996{
1997 kvm_queue_exception(vcpu, UD_VECTOR);
1998 return 1;
1999}
2000EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
2001
2002int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
2003{
2004 pr_warn_once("kvm: MONITOR instruction emulated as NOP!\n");
2005 return kvm_emulate_as_nop(vcpu);
2006}
2007EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
2008
2009static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
2010{
2011 xfer_to_guest_mode_prepare();
2012 return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
2013 xfer_to_guest_mode_work_pending();
2014}
2015
2016
2017
2018
2019
2020
2021
2022
2023static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
2024{
2025 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
2026 return 1;
2027
2028 if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
2029 ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
2030 ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
2031 ((u32)(data >> 32) != X2APIC_BROADCAST)) {
2032
2033 data &= ~(1 << 12);
2034 kvm_apic_send_ipi(vcpu->arch.apic, (u32)data, (u32)(data >> 32));
2035 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
2036 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR, (u32)data);
2037 trace_kvm_apic_write(APIC_ICR, (u32)data);
2038 return 0;
2039 }
2040
2041 return 1;
2042}
2043
2044static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
2045{
2046 if (!kvm_can_use_hv_timer(vcpu))
2047 return 1;
2048
2049 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2050 return 0;
2051}
2052
2053fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
2054{
2055 u32 msr = kvm_rcx_read(vcpu);
2056 u64 data;
2057 fastpath_t ret = EXIT_FASTPATH_NONE;
2058
2059 switch (msr) {
2060 case APIC_BASE_MSR + (APIC_ICR >> 4):
2061 data = kvm_read_edx_eax(vcpu);
2062 if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
2063 kvm_skip_emulated_instruction(vcpu);
2064 ret = EXIT_FASTPATH_EXIT_HANDLED;
2065 }
2066 break;
2067 case MSR_IA32_TSC_DEADLINE:
2068 data = kvm_read_edx_eax(vcpu);
2069 if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
2070 kvm_skip_emulated_instruction(vcpu);
2071 ret = EXIT_FASTPATH_REENTER_GUEST;
2072 }
2073 break;
2074 default:
2075 break;
2076 }
2077
2078 if (ret != EXIT_FASTPATH_NONE)
2079 trace_kvm_msr_write(msr, data);
2080
2081 return ret;
2082}
2083EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
2084
2085
2086
2087
2088static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2089{
2090 return kvm_get_msr_ignored_check(vcpu, index, data, true);
2091}
2092
2093static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
2094{
2095 return kvm_set_msr_ignored_check(vcpu, index, *data, true);
2096}
2097
2098#ifdef CONFIG_X86_64
2099struct pvclock_clock {
2100 int vclock_mode;
2101 u64 cycle_last;
2102 u64 mask;
2103 u32 mult;
2104 u32 shift;
2105 u64 base_cycles;
2106 u64 offset;
2107};
2108
2109struct pvclock_gtod_data {
2110 seqcount_t seq;
2111
2112 struct pvclock_clock clock;
2113 struct pvclock_clock raw_clock;
2114
2115 ktime_t offs_boot;
2116 u64 wall_time_sec;
2117};
2118
2119static struct pvclock_gtod_data pvclock_gtod_data;
2120
2121static void update_pvclock_gtod(struct timekeeper *tk)
2122{
2123 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
2124
2125 write_seqcount_begin(&vdata->seq);
2126
2127
2128 vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
2129 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
2130 vdata->clock.mask = tk->tkr_mono.mask;
2131 vdata->clock.mult = tk->tkr_mono.mult;
2132 vdata->clock.shift = tk->tkr_mono.shift;
2133 vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
2134 vdata->clock.offset = tk->tkr_mono.base;
2135
2136 vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
2137 vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
2138 vdata->raw_clock.mask = tk->tkr_raw.mask;
2139 vdata->raw_clock.mult = tk->tkr_raw.mult;
2140 vdata->raw_clock.shift = tk->tkr_raw.shift;
2141 vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
2142 vdata->raw_clock.offset = tk->tkr_raw.base;
2143
2144 vdata->wall_time_sec = tk->xtime_sec;
2145
2146 vdata->offs_boot = tk->offs_boot;
2147
2148 write_seqcount_end(&vdata->seq);
2149}
2150
2151static s64 get_kvmclock_base_ns(void)
2152{
2153
2154 return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
2155}
2156#else
2157static s64 get_kvmclock_base_ns(void)
2158{
2159
2160 return ktime_get_boottime_ns();
2161}
2162#endif
2163
2164static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
2165{
2166 int version;
2167 int r;
2168 struct pvclock_wall_clock wc;
2169 u32 wc_sec_hi;
2170 u64 wall_nsec;
2171
2172 if (!wall_clock)
2173 return;
2174
2175 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
2176 if (r)
2177 return;
2178
2179 if (version & 1)
2180 ++version;
2181
2182 ++version;
2183
2184 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
2185 return;
2186
2187
2188
2189
2190
2191
2192 wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
2193
2194 wc.nsec = do_div(wall_nsec, 1000000000);
2195 wc.sec = (u32)wall_nsec;
2196 wc.version = version;
2197
2198 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
2199
2200 if (sec_hi_ofs) {
2201 wc_sec_hi = wall_nsec >> 32;
2202 kvm_write_guest(kvm, wall_clock + sec_hi_ofs,
2203 &wc_sec_hi, sizeof(wc_sec_hi));
2204 }
2205
2206 version++;
2207 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
2208}
2209
2210static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
2211 bool old_msr, bool host_initiated)
2212{
2213 struct kvm_arch *ka = &vcpu->kvm->arch;
2214
2215 if (vcpu->vcpu_id == 0 && !host_initiated) {
2216 if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
2217 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2218
2219 ka->boot_vcpu_runs_old_kvmclock = old_msr;
2220 }
2221
2222 vcpu->arch.time = system_time;
2223 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2224
2225
2226 vcpu->arch.pv_time_enabled = false;
2227 if (!(system_time & 1))
2228 return;
2229
2230 if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
2231 &vcpu->arch.pv_time, system_time & ~1ULL,
2232 sizeof(struct pvclock_vcpu_time_info)))
2233 vcpu->arch.pv_time_enabled = true;
2234
2235 return;
2236}
2237
2238static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
2239{
2240 do_shl32_div32(dividend, divisor);
2241 return dividend;
2242}
2243
2244static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
2245 s8 *pshift, u32 *pmultiplier)
2246{
2247 uint64_t scaled64;
2248 int32_t shift = 0;
2249 uint64_t tps64;
2250 uint32_t tps32;
2251
2252 tps64 = base_hz;
2253 scaled64 = scaled_hz;
2254 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
2255 tps64 >>= 1;
2256 shift--;
2257 }
2258
2259 tps32 = (uint32_t)tps64;
2260 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
2261 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
2262 scaled64 >>= 1;
2263 else
2264 tps32 <<= 1;
2265 shift++;
2266 }
2267
2268 *pshift = shift;
2269 *pmultiplier = div_frac(scaled64, tps32);
2270}
2271
2272#ifdef CONFIG_X86_64
2273static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
2274#endif
2275
2276static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
2277static unsigned long max_tsc_khz;
2278
2279static u32 adjust_tsc_khz(u32 khz, s32 ppm)
2280{
2281 u64 v = (u64)khz * (1000000 + ppm);
2282 do_div(v, 1000000);
2283 return v;
2284}
2285
2286static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier);
2287
2288static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
2289{
2290 u64 ratio;
2291
2292
2293 if (!scale) {
2294 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
2295 return 0;
2296 }
2297
2298
2299 if (!kvm_has_tsc_control) {
2300 if (user_tsc_khz > tsc_khz) {
2301 vcpu->arch.tsc_catchup = 1;
2302 vcpu->arch.tsc_always_catchup = 1;
2303 return 0;
2304 } else {
2305 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
2306 return -1;
2307 }
2308 }
2309
2310
2311 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
2312 user_tsc_khz, tsc_khz);
2313
2314 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
2315 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
2316 user_tsc_khz);
2317 return -1;
2318 }
2319
2320 kvm_vcpu_write_tsc_multiplier(vcpu, ratio);
2321 return 0;
2322}
2323
2324static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
2325{
2326 u32 thresh_lo, thresh_hi;
2327 int use_scaling = 0;
2328
2329
2330 if (user_tsc_khz == 0) {
2331
2332 kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
2333 return -1;
2334 }
2335
2336
2337 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
2338 &vcpu->arch.virtual_tsc_shift,
2339 &vcpu->arch.virtual_tsc_mult);
2340 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
2341
2342
2343
2344
2345
2346
2347
2348 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
2349 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
2350 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
2351 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
2352 use_scaling = 1;
2353 }
2354 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
2355}
2356
2357static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
2358{
2359 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
2360 vcpu->arch.virtual_tsc_mult,
2361 vcpu->arch.virtual_tsc_shift);
2362 tsc += vcpu->arch.this_tsc_write;
2363 return tsc;
2364}
2365
2366#ifdef CONFIG_X86_64
2367static inline int gtod_is_based_on_tsc(int mode)
2368{
2369 return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
2370}
2371#endif
2372
2373static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
2374{
2375#ifdef CONFIG_X86_64
2376 bool vcpus_matched;
2377 struct kvm_arch *ka = &vcpu->kvm->arch;
2378 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2379
2380 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2381 atomic_read(&vcpu->kvm->online_vcpus));
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391 if (ka->use_master_clock ||
2392 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
2393 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2394
2395 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
2396 atomic_read(&vcpu->kvm->online_vcpus),
2397 ka->use_master_clock, gtod->clock.vclock_mode);
2398#endif
2399}
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411static inline u64 __scale_tsc(u64 ratio, u64 tsc)
2412{
2413 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
2414}
2415
2416u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio)
2417{
2418 u64 _tsc = tsc;
2419
2420 if (ratio != kvm_default_tsc_scaling_ratio)
2421 _tsc = __scale_tsc(ratio, tsc);
2422
2423 return _tsc;
2424}
2425EXPORT_SYMBOL_GPL(kvm_scale_tsc);
2426
2427static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
2428{
2429 u64 tsc;
2430
2431 tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio);
2432
2433 return target_tsc - tsc;
2434}
2435
2436u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2437{
2438 return vcpu->arch.l1_tsc_offset +
2439 kvm_scale_tsc(vcpu, host_tsc, vcpu->arch.l1_tsc_scaling_ratio);
2440}
2441EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
2442
2443u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
2444{
2445 u64 nested_offset;
2446
2447 if (l2_multiplier == kvm_default_tsc_scaling_ratio)
2448 nested_offset = l1_offset;
2449 else
2450 nested_offset = mul_s64_u64_shr((s64) l1_offset, l2_multiplier,
2451 kvm_tsc_scaling_ratio_frac_bits);
2452
2453 nested_offset += l2_offset;
2454 return nested_offset;
2455}
2456EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
2457
2458u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
2459{
2460 if (l2_multiplier != kvm_default_tsc_scaling_ratio)
2461 return mul_u64_u64_shr(l1_multiplier, l2_multiplier,
2462 kvm_tsc_scaling_ratio_frac_bits);
2463
2464 return l1_multiplier;
2465}
2466EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
2467
2468static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
2469{
2470 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2471 vcpu->arch.l1_tsc_offset,
2472 l1_offset);
2473
2474 vcpu->arch.l1_tsc_offset = l1_offset;
2475
2476
2477
2478
2479
2480
2481 if (is_guest_mode(vcpu))
2482 vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
2483 l1_offset,
2484 static_call(kvm_x86_get_l2_tsc_offset)(vcpu),
2485 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2486 else
2487 vcpu->arch.tsc_offset = l1_offset;
2488
2489 static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
2490}
2491
2492static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
2493{
2494 vcpu->arch.l1_tsc_scaling_ratio = l1_multiplier;
2495
2496
2497 if (is_guest_mode(vcpu))
2498 vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
2499 l1_multiplier,
2500 static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
2501 else
2502 vcpu->arch.tsc_scaling_ratio = l1_multiplier;
2503
2504 if (kvm_has_tsc_control)
2505 static_call(kvm_x86_write_tsc_multiplier)(
2506 vcpu, vcpu->arch.tsc_scaling_ratio);
2507}
2508
2509static inline bool kvm_check_tsc_unstable(void)
2510{
2511#ifdef CONFIG_X86_64
2512
2513
2514
2515
2516 if (pvclock_gtod_data.clock.vclock_mode == VDSO_CLOCKMODE_HVCLOCK)
2517 return false;
2518#endif
2519 return check_tsc_unstable();
2520}
2521
2522
2523
2524
2525
2526
2527static void __kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 offset, u64 tsc,
2528 u64 ns, bool matched)
2529{
2530 struct kvm *kvm = vcpu->kvm;
2531
2532 lockdep_assert_held(&kvm->arch.tsc_write_lock);
2533
2534
2535
2536
2537
2538 kvm->arch.last_tsc_nsec = ns;
2539 kvm->arch.last_tsc_write = tsc;
2540 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
2541 kvm->arch.last_tsc_offset = offset;
2542
2543 vcpu->arch.last_guest_tsc = tsc;
2544
2545 kvm_vcpu_write_tsc_offset(vcpu, offset);
2546
2547 if (!matched) {
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557 kvm->arch.cur_tsc_generation++;
2558 kvm->arch.cur_tsc_nsec = ns;
2559 kvm->arch.cur_tsc_write = tsc;
2560 kvm->arch.cur_tsc_offset = offset;
2561 kvm->arch.nr_vcpus_matched_tsc = 0;
2562 } else if (vcpu->arch.this_tsc_generation != kvm->arch.cur_tsc_generation) {
2563 kvm->arch.nr_vcpus_matched_tsc++;
2564 }
2565
2566
2567 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
2568 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
2569 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
2570
2571 kvm_track_tsc_matching(vcpu);
2572}
2573
2574static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
2575{
2576 struct kvm *kvm = vcpu->kvm;
2577 u64 offset, ns, elapsed;
2578 unsigned long flags;
2579 bool matched = false;
2580 bool synchronizing = false;
2581
2582 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
2583 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2584 ns = get_kvmclock_base_ns();
2585 elapsed = ns - kvm->arch.last_tsc_nsec;
2586
2587 if (vcpu->arch.virtual_tsc_khz) {
2588 if (data == 0) {
2589
2590
2591
2592
2593
2594 synchronizing = true;
2595 } else {
2596 u64 tsc_exp = kvm->arch.last_tsc_write +
2597 nsec_to_cycles(vcpu, elapsed);
2598 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
2599
2600
2601
2602
2603
2604 synchronizing = data < tsc_exp + tsc_hz &&
2605 data + tsc_hz > tsc_exp;
2606 }
2607 }
2608
2609
2610
2611
2612
2613
2614
2615 if (synchronizing &&
2616 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
2617 if (!kvm_check_tsc_unstable()) {
2618 offset = kvm->arch.cur_tsc_offset;
2619 } else {
2620 u64 delta = nsec_to_cycles(vcpu, elapsed);
2621 data += delta;
2622 offset = kvm_compute_l1_tsc_offset(vcpu, data);
2623 }
2624 matched = true;
2625 }
2626
2627 __kvm_synchronize_tsc(vcpu, offset, data, ns, matched);
2628 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
2629}
2630
2631static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
2632 s64 adjustment)
2633{
2634 u64 tsc_offset = vcpu->arch.l1_tsc_offset;
2635 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
2636}
2637
2638static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
2639{
2640 if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
2641 WARN_ON(adjustment < 0);
2642 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment,
2643 vcpu->arch.l1_tsc_scaling_ratio);
2644 adjust_tsc_offset_guest(vcpu, adjustment);
2645}
2646
2647#ifdef CONFIG_X86_64
2648
2649static u64 read_tsc(void)
2650{
2651 u64 ret = (u64)rdtsc_ordered();
2652 u64 last = pvclock_gtod_data.clock.cycle_last;
2653
2654 if (likely(ret >= last))
2655 return ret;
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665 asm volatile ("");
2666 return last;
2667}
2668
2669static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
2670 int *mode)
2671{
2672 long v;
2673 u64 tsc_pg_val;
2674
2675 switch (clock->vclock_mode) {
2676 case VDSO_CLOCKMODE_HVCLOCK:
2677 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2678 tsc_timestamp);
2679 if (tsc_pg_val != U64_MAX) {
2680
2681 *mode = VDSO_CLOCKMODE_HVCLOCK;
2682 v = (tsc_pg_val - clock->cycle_last) &
2683 clock->mask;
2684 } else {
2685
2686 *mode = VDSO_CLOCKMODE_NONE;
2687 }
2688 break;
2689 case VDSO_CLOCKMODE_TSC:
2690 *mode = VDSO_CLOCKMODE_TSC;
2691 *tsc_timestamp = read_tsc();
2692 v = (*tsc_timestamp - clock->cycle_last) &
2693 clock->mask;
2694 break;
2695 default:
2696 *mode = VDSO_CLOCKMODE_NONE;
2697 }
2698
2699 if (*mode == VDSO_CLOCKMODE_NONE)
2700 *tsc_timestamp = v = 0;
2701
2702 return v * clock->mult;
2703}
2704
2705static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
2706{
2707 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2708 unsigned long seq;
2709 int mode;
2710 u64 ns;
2711
2712 do {
2713 seq = read_seqcount_begin(>od->seq);
2714 ns = gtod->raw_clock.base_cycles;
2715 ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
2716 ns >>= gtod->raw_clock.shift;
2717 ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
2718 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2719 *t = ns;
2720
2721 return mode;
2722}
2723
2724static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2725{
2726 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2727 unsigned long seq;
2728 int mode;
2729 u64 ns;
2730
2731 do {
2732 seq = read_seqcount_begin(>od->seq);
2733 ts->tv_sec = gtod->wall_time_sec;
2734 ns = gtod->clock.base_cycles;
2735 ns += vgettsc(>od->clock, tsc_timestamp, &mode);
2736 ns >>= gtod->clock.shift;
2737 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2738
2739 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2740 ts->tv_nsec = ns;
2741
2742 return mode;
2743}
2744
2745
2746static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2747{
2748
2749 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2750 return false;
2751
2752 return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
2753 tsc_timestamp));
2754}
2755
2756
2757static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2758 u64 *tsc_timestamp)
2759{
2760
2761 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2762 return false;
2763
2764 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2765}
2766#endif
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2810{
2811#ifdef CONFIG_X86_64
2812 struct kvm_arch *ka = &kvm->arch;
2813 int vclock_mode;
2814 bool host_tsc_clocksource, vcpus_matched;
2815
2816 lockdep_assert_held(&kvm->arch.tsc_write_lock);
2817 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2818 atomic_read(&kvm->online_vcpus));
2819
2820
2821
2822
2823
2824 host_tsc_clocksource = kvm_get_time_and_clockread(
2825 &ka->master_kernel_ns,
2826 &ka->master_cycle_now);
2827
2828 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2829 && !ka->backwards_tsc_observed
2830 && !ka->boot_vcpu_runs_old_kvmclock;
2831
2832 if (ka->use_master_clock)
2833 atomic_set(&kvm_guest_has_master_clock, 1);
2834
2835 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2836 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2837 vcpus_matched);
2838#endif
2839}
2840
2841static void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2842{
2843 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2844}
2845
2846static void __kvm_start_pvclock_update(struct kvm *kvm)
2847{
2848 raw_spin_lock_irq(&kvm->arch.tsc_write_lock);
2849 write_seqcount_begin(&kvm->arch.pvclock_sc);
2850}
2851
2852static void kvm_start_pvclock_update(struct kvm *kvm)
2853{
2854 kvm_make_mclock_inprogress_request(kvm);
2855
2856
2857 __kvm_start_pvclock_update(kvm);
2858}
2859
2860static void kvm_end_pvclock_update(struct kvm *kvm)
2861{
2862 struct kvm_arch *ka = &kvm->arch;
2863 struct kvm_vcpu *vcpu;
2864 unsigned long i;
2865
2866 write_seqcount_end(&ka->pvclock_sc);
2867 raw_spin_unlock_irq(&ka->tsc_write_lock);
2868 kvm_for_each_vcpu(i, vcpu, kvm)
2869 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2870
2871
2872 kvm_for_each_vcpu(i, vcpu, kvm)
2873 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2874}
2875
2876static void kvm_update_masterclock(struct kvm *kvm)
2877{
2878 kvm_hv_invalidate_tsc_page(kvm);
2879 kvm_start_pvclock_update(kvm);
2880 pvclock_update_vm_gtod_copy(kvm);
2881 kvm_end_pvclock_update(kvm);
2882}
2883
2884
2885static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
2886{
2887 struct kvm_arch *ka = &kvm->arch;
2888 struct pvclock_vcpu_time_info hv_clock;
2889
2890
2891 get_cpu();
2892
2893 data->flags = 0;
2894 if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) {
2895#ifdef CONFIG_X86_64
2896 struct timespec64 ts;
2897
2898 if (kvm_get_walltime_and_clockread(&ts, &data->host_tsc)) {
2899 data->realtime = ts.tv_nsec + NSEC_PER_SEC * ts.tv_sec;
2900 data->flags |= KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC;
2901 } else
2902#endif
2903 data->host_tsc = rdtsc();
2904
2905 data->flags |= KVM_CLOCK_TSC_STABLE;
2906 hv_clock.tsc_timestamp = ka->master_cycle_now;
2907 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2908 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2909 &hv_clock.tsc_shift,
2910 &hv_clock.tsc_to_system_mul);
2911 data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
2912 } else {
2913 data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
2914 }
2915
2916 put_cpu();
2917}
2918
2919static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
2920{
2921 struct kvm_arch *ka = &kvm->arch;
2922 unsigned seq;
2923
2924 do {
2925 seq = read_seqcount_begin(&ka->pvclock_sc);
2926 __get_kvmclock(kvm, data);
2927 } while (read_seqcount_retry(&ka->pvclock_sc, seq));
2928}
2929
2930u64 get_kvmclock_ns(struct kvm *kvm)
2931{
2932 struct kvm_clock_data data;
2933
2934 get_kvmclock(kvm, &data);
2935 return data.clock;
2936}
2937
2938static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
2939 struct gfn_to_hva_cache *cache,
2940 unsigned int offset)
2941{
2942 struct kvm_vcpu_arch *vcpu = &v->arch;
2943 struct pvclock_vcpu_time_info guest_hv_clock;
2944
2945 if (unlikely(kvm_read_guest_offset_cached(v->kvm, cache,
2946 &guest_hv_clock, offset, sizeof(guest_hv_clock))))
2947 return;
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2964
2965 if (guest_hv_clock.version & 1)
2966 ++guest_hv_clock.version;
2967
2968 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2969 kvm_write_guest_offset_cached(v->kvm, cache,
2970 &vcpu->hv_clock, offset,
2971 sizeof(vcpu->hv_clock.version));
2972
2973 smp_wmb();
2974
2975
2976 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2977
2978 if (vcpu->pvclock_set_guest_stopped_request) {
2979 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2980 vcpu->pvclock_set_guest_stopped_request = false;
2981 }
2982
2983 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2984
2985 kvm_write_guest_offset_cached(v->kvm, cache,
2986 &vcpu->hv_clock, offset,
2987 sizeof(vcpu->hv_clock));
2988
2989 smp_wmb();
2990
2991 vcpu->hv_clock.version++;
2992 kvm_write_guest_offset_cached(v->kvm, cache,
2993 &vcpu->hv_clock, offset,
2994 sizeof(vcpu->hv_clock.version));
2995}
2996
2997static int kvm_guest_time_update(struct kvm_vcpu *v)
2998{
2999 unsigned long flags, tgt_tsc_khz;
3000 unsigned seq;
3001 struct kvm_vcpu_arch *vcpu = &v->arch;
3002 struct kvm_arch *ka = &v->kvm->arch;
3003 s64 kernel_ns;
3004 u64 tsc_timestamp, host_tsc;
3005 u8 pvclock_flags;
3006 bool use_master_clock;
3007
3008 kernel_ns = 0;
3009 host_tsc = 0;
3010
3011
3012
3013
3014
3015 do {
3016 seq = read_seqcount_begin(&ka->pvclock_sc);
3017 use_master_clock = ka->use_master_clock;
3018 if (use_master_clock) {
3019 host_tsc = ka->master_cycle_now;
3020 kernel_ns = ka->master_kernel_ns;
3021 }
3022 } while (read_seqcount_retry(&ka->pvclock_sc, seq));
3023
3024
3025 local_irq_save(flags);
3026 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
3027 if (unlikely(tgt_tsc_khz == 0)) {
3028 local_irq_restore(flags);
3029 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
3030 return 1;
3031 }
3032 if (!use_master_clock) {
3033 host_tsc = rdtsc();
3034 kernel_ns = get_kvmclock_base_ns();
3035 }
3036
3037 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049 if (vcpu->tsc_catchup) {
3050 u64 tsc = compute_guest_tsc(v, kernel_ns);
3051 if (tsc > tsc_timestamp) {
3052 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
3053 tsc_timestamp = tsc;
3054 }
3055 }
3056
3057 local_irq_restore(flags);
3058
3059
3060
3061 if (kvm_has_tsc_control)
3062 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz,
3063 v->arch.l1_tsc_scaling_ratio);
3064
3065 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
3066 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
3067 &vcpu->hv_clock.tsc_shift,
3068 &vcpu->hv_clock.tsc_to_system_mul);
3069 vcpu->hw_tsc_khz = tgt_tsc_khz;
3070 }
3071
3072 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
3073 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
3074 vcpu->last_guest_tsc = tsc_timestamp;
3075
3076
3077 pvclock_flags = 0;
3078 if (use_master_clock)
3079 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
3080
3081 vcpu->hv_clock.flags = pvclock_flags;
3082
3083 if (vcpu->pv_time_enabled)
3084 kvm_setup_pvclock_page(v, &vcpu->pv_time, 0);
3085 if (vcpu->xen.vcpu_info_set)
3086 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_info_cache,
3087 offsetof(struct compat_vcpu_info, time));
3088 if (vcpu->xen.vcpu_time_info_set)
3089 kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
3090 if (!v->vcpu_idx)
3091 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
3092 return 0;
3093}
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
3110
3111static void kvmclock_update_fn(struct work_struct *work)
3112{
3113 unsigned long i;
3114 struct delayed_work *dwork = to_delayed_work(work);
3115 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3116 kvmclock_update_work);
3117 struct kvm *kvm = container_of(ka, struct kvm, arch);
3118 struct kvm_vcpu *vcpu;
3119
3120 kvm_for_each_vcpu(i, vcpu, kvm) {
3121 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3122 kvm_vcpu_kick(vcpu);
3123 }
3124}
3125
3126static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
3127{
3128 struct kvm *kvm = v->kvm;
3129
3130 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
3131 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
3132 KVMCLOCK_UPDATE_DELAY);
3133}
3134
3135#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
3136
3137static void kvmclock_sync_fn(struct work_struct *work)
3138{
3139 struct delayed_work *dwork = to_delayed_work(work);
3140 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
3141 kvmclock_sync_work);
3142 struct kvm *kvm = container_of(ka, struct kvm, arch);
3143
3144 if (!kvmclock_periodic_sync)
3145 return;
3146
3147 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
3148 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
3149 KVMCLOCK_SYNC_PERIOD);
3150}
3151
3152
3153
3154
3155static bool can_set_mci_status(struct kvm_vcpu *vcpu)
3156{
3157
3158 if (guest_cpuid_is_amd_or_hygon(vcpu))
3159 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
3160
3161 return false;
3162}
3163
3164static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3165{
3166 u64 mcg_cap = vcpu->arch.mcg_cap;
3167 unsigned bank_num = mcg_cap & 0xff;
3168 u32 msr = msr_info->index;
3169 u64 data = msr_info->data;
3170
3171 switch (msr) {
3172 case MSR_IA32_MCG_STATUS:
3173 vcpu->arch.mcg_status = data;
3174 break;
3175 case MSR_IA32_MCG_CTL:
3176 if (!(mcg_cap & MCG_CTL_P) &&
3177 (data || !msr_info->host_initiated))
3178 return 1;
3179 if (data != 0 && data != ~(u64)0)
3180 return 1;
3181 vcpu->arch.mcg_ctl = data;
3182 break;
3183 default:
3184 if (msr >= MSR_IA32_MC0_CTL &&
3185 msr < MSR_IA32_MCx_CTL(bank_num)) {
3186 u32 offset = array_index_nospec(
3187 msr - MSR_IA32_MC0_CTL,
3188 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3189
3190
3191
3192
3193
3194
3195 if ((offset & 0x3) == 0 &&
3196 data != 0 && (data | (1 << 10)) != ~(u64)0)
3197 return -1;
3198
3199
3200 if (!msr_info->host_initiated &&
3201 (offset & 0x3) == 1 && data != 0) {
3202 if (!can_set_mci_status(vcpu))
3203 return -1;
3204 }
3205
3206 vcpu->arch.mce_banks[offset] = data;
3207 break;
3208 }
3209 return 1;
3210 }
3211 return 0;
3212}
3213
3214static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
3215{
3216 u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
3217
3218 return (vcpu->arch.apf.msr_en_val & mask) == mask;
3219}
3220
3221static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
3222{
3223 gpa_t gpa = data & ~0x3f;
3224
3225
3226 if (data & 0x30)
3227 return 1;
3228
3229 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
3230 (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
3231 return 1;
3232
3233 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
3234 (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
3235 return 1;
3236
3237 if (!lapic_in_kernel(vcpu))
3238 return data ? 1 : 0;
3239
3240 vcpu->arch.apf.msr_en_val = data;
3241
3242 if (!kvm_pv_async_pf_enabled(vcpu)) {
3243 kvm_clear_async_pf_completion_queue(vcpu);
3244 kvm_async_pf_hash_reset(vcpu);
3245 return 0;
3246 }
3247
3248 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
3249 sizeof(u64)))
3250 return 1;
3251
3252 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
3253 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
3254
3255 kvm_async_pf_wakeup_all(vcpu);
3256
3257 return 0;
3258}
3259
3260static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
3261{
3262
3263 if (data >> 8)
3264 return 1;
3265
3266 if (!lapic_in_kernel(vcpu))
3267 return 1;
3268
3269 vcpu->arch.apf.msr_int_val = data;
3270
3271 vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
3272
3273 return 0;
3274}
3275
3276static void kvmclock_reset(struct kvm_vcpu *vcpu)
3277{
3278 vcpu->arch.pv_time_enabled = false;
3279 vcpu->arch.time = 0;
3280}
3281
3282static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
3283{
3284 ++vcpu->stat.tlb_flush;
3285 static_call(kvm_x86_tlb_flush_all)(vcpu);
3286}
3287
3288static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
3289{
3290 ++vcpu->stat.tlb_flush;
3291
3292 if (!tdp_enabled) {
3293
3294
3295
3296
3297
3298
3299 kvm_mmu_sync_roots(vcpu);
3300 kvm_mmu_sync_prev_roots(vcpu);
3301 }
3302
3303 static_call(kvm_x86_tlb_flush_guest)(vcpu);
3304}
3305
3306
3307static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
3308{
3309 ++vcpu->stat.tlb_flush;
3310 static_call(kvm_x86_tlb_flush_current)(vcpu);
3311}
3312
3313
3314
3315
3316
3317
3318
3319void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
3320{
3321 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
3322 kvm_vcpu_flush_tlb_current(vcpu);
3323
3324 if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
3325 kvm_vcpu_flush_tlb_guest(vcpu);
3326}
3327EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
3328
3329static void record_steal_time(struct kvm_vcpu *vcpu)
3330{
3331 struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
3332 struct kvm_steal_time __user *st;
3333 struct kvm_memslots *slots;
3334 u64 steal;
3335 u32 version;
3336
3337 if (kvm_xen_msr_enabled(vcpu->kvm)) {
3338 kvm_xen_runstate_set_running(vcpu);
3339 return;
3340 }
3341
3342 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3343 return;
3344
3345 if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
3346 return;
3347
3348 slots = kvm_memslots(vcpu->kvm);
3349
3350 if (unlikely(slots->generation != ghc->generation ||
3351 kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
3352 gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
3353
3354
3355 BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
3356
3357 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
3358 kvm_is_error_hva(ghc->hva) || !ghc->memslot)
3359 return;
3360 }
3361
3362 st = (struct kvm_steal_time __user *)ghc->hva;
3363
3364
3365
3366
3367 if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
3368 u8 st_preempted = 0;
3369 int err = -EFAULT;
3370
3371 if (!user_access_begin(st, sizeof(*st)))
3372 return;
3373
3374 asm volatile("1: xchgb %0, %2\n"
3375 "xor %1, %1\n"
3376 "2:\n"
3377 _ASM_EXTABLE_UA(1b, 2b)
3378 : "+q" (st_preempted),
3379 "+&r" (err),
3380 "+m" (st->preempted));
3381 if (err)
3382 goto out;
3383
3384 user_access_end();
3385
3386 vcpu->arch.st.preempted = 0;
3387
3388 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
3389 st_preempted & KVM_VCPU_FLUSH_TLB);
3390 if (st_preempted & KVM_VCPU_FLUSH_TLB)
3391 kvm_vcpu_flush_tlb_guest(vcpu);
3392
3393 if (!user_access_begin(st, sizeof(*st)))
3394 goto dirty;
3395 } else {
3396 if (!user_access_begin(st, sizeof(*st)))
3397 return;
3398
3399 unsafe_put_user(0, &st->preempted, out);
3400 vcpu->arch.st.preempted = 0;
3401 }
3402
3403 unsafe_get_user(version, &st->version, out);
3404 if (version & 1)
3405 version += 1;
3406
3407 version += 1;
3408 unsafe_put_user(version, &st->version, out);
3409
3410 smp_wmb();
3411
3412 unsafe_get_user(steal, &st->steal, out);
3413 steal += current->sched_info.run_delay -
3414 vcpu->arch.st.last_steal;
3415 vcpu->arch.st.last_steal = current->sched_info.run_delay;
3416 unsafe_put_user(steal, &st->steal, out);
3417
3418 version += 1;
3419 unsafe_put_user(version, &st->version, out);
3420
3421 out:
3422 user_access_end();
3423 dirty:
3424 mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
3425}
3426
3427int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3428{
3429 bool pr = false;
3430 u32 msr = msr_info->index;
3431 u64 data = msr_info->data;
3432
3433 if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
3434 return kvm_xen_write_hypercall_page(vcpu, data);
3435
3436 switch (msr) {
3437 case MSR_AMD64_NB_CFG:
3438 case MSR_IA32_UCODE_WRITE:
3439 case MSR_VM_HSAVE_PA:
3440 case MSR_AMD64_PATCH_LOADER:
3441 case MSR_AMD64_BU_CFG2:
3442 case MSR_AMD64_DC_CFG:
3443 case MSR_F15H_EX_CFG:
3444 break;
3445
3446 case MSR_IA32_UCODE_REV:
3447 if (msr_info->host_initiated)
3448 vcpu->arch.microcode_version = data;
3449 break;
3450 case MSR_IA32_ARCH_CAPABILITIES:
3451 if (!msr_info->host_initiated)
3452 return 1;
3453 vcpu->arch.arch_capabilities = data;
3454 break;
3455 case MSR_IA32_PERF_CAPABILITIES: {
3456 struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
3457
3458 if (!msr_info->host_initiated)
3459 return 1;
3460 if (kvm_get_msr_feature(&msr_ent))
3461 return 1;
3462 if (data & ~msr_ent.data)
3463 return 1;
3464
3465 vcpu->arch.perf_capabilities = data;
3466
3467 return 0;
3468 }
3469 case MSR_EFER:
3470 return set_efer(vcpu, msr_info);
3471 case MSR_K7_HWCR:
3472 data &= ~(u64)0x40;
3473 data &= ~(u64)0x100;
3474 data &= ~(u64)0x8;
3475
3476
3477 if (data == BIT_ULL(18)) {
3478 vcpu->arch.msr_hwcr = data;
3479 } else if (data != 0) {
3480 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
3481 data);
3482 return 1;
3483 }
3484 break;
3485 case MSR_FAM10H_MMIO_CONF_BASE:
3486 if (data != 0) {
3487 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
3488 "0x%llx\n", data);
3489 return 1;
3490 }
3491 break;
3492 case 0x200 ... 0x2ff:
3493 return kvm_mtrr_set_msr(vcpu, msr, data);
3494 case MSR_IA32_APICBASE:
3495 return kvm_set_apic_base(vcpu, msr_info);
3496 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3497 return kvm_x2apic_msr_write(vcpu, msr, data);
3498 case MSR_IA32_TSC_DEADLINE:
3499 kvm_set_lapic_tscdeadline_msr(vcpu, data);
3500 break;
3501 case MSR_IA32_TSC_ADJUST:
3502 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
3503 if (!msr_info->host_initiated) {
3504 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
3505 adjust_tsc_offset_guest(vcpu, adj);
3506
3507
3508
3509 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3510 }
3511 vcpu->arch.ia32_tsc_adjust_msr = data;
3512 }
3513 break;
3514 case MSR_IA32_MISC_ENABLE:
3515 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
3516 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
3517 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
3518 return 1;
3519 vcpu->arch.ia32_misc_enable_msr = data;
3520 kvm_update_cpuid_runtime(vcpu);
3521 } else {
3522 vcpu->arch.ia32_misc_enable_msr = data;
3523 }
3524 break;
3525 case MSR_IA32_SMBASE:
3526 if (!msr_info->host_initiated)
3527 return 1;
3528 vcpu->arch.smbase = data;
3529 break;
3530 case MSR_IA32_POWER_CTL:
3531 vcpu->arch.msr_ia32_power_ctl = data;
3532 break;
3533 case MSR_IA32_TSC:
3534 if (msr_info->host_initiated) {
3535 kvm_synchronize_tsc(vcpu, data);
3536 } else {
3537 u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
3538 adjust_tsc_offset_guest(vcpu, adj);
3539 vcpu->arch.ia32_tsc_adjust_msr += adj;
3540 }
3541 break;
3542 case MSR_IA32_XSS:
3543 if (!msr_info->host_initiated &&
3544 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3545 return 1;
3546
3547
3548
3549
3550
3551 if (data & ~supported_xss)
3552 return 1;
3553 vcpu->arch.ia32_xss = data;
3554 kvm_update_cpuid_runtime(vcpu);
3555 break;
3556 case MSR_SMI_COUNT:
3557 if (!msr_info->host_initiated)
3558 return 1;
3559 vcpu->arch.smi_count = data;
3560 break;
3561 case MSR_KVM_WALL_CLOCK_NEW:
3562 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3563 return 1;
3564
3565 vcpu->kvm->arch.wall_clock = data;
3566 kvm_write_wall_clock(vcpu->kvm, data, 0);
3567 break;
3568 case MSR_KVM_WALL_CLOCK:
3569 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3570 return 1;
3571
3572 vcpu->kvm->arch.wall_clock = data;
3573 kvm_write_wall_clock(vcpu->kvm, data, 0);
3574 break;
3575 case MSR_KVM_SYSTEM_TIME_NEW:
3576 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3577 return 1;
3578
3579 kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
3580 break;
3581 case MSR_KVM_SYSTEM_TIME:
3582 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3583 return 1;
3584
3585 kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
3586 break;
3587 case MSR_KVM_ASYNC_PF_EN:
3588 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3589 return 1;
3590
3591 if (kvm_pv_enable_async_pf(vcpu, data))
3592 return 1;
3593 break;
3594 case MSR_KVM_ASYNC_PF_INT:
3595 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3596 return 1;
3597
3598 if (kvm_pv_enable_async_pf_int(vcpu, data))
3599 return 1;
3600 break;
3601 case MSR_KVM_ASYNC_PF_ACK:
3602 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3603 return 1;
3604 if (data & 0x1) {
3605 vcpu->arch.apf.pageready_pending = false;
3606 kvm_check_async_pf_completion(vcpu);
3607 }
3608 break;
3609 case MSR_KVM_STEAL_TIME:
3610 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3611 return 1;
3612
3613 if (unlikely(!sched_info_on()))
3614 return 1;
3615
3616 if (data & KVM_STEAL_RESERVED_MASK)
3617 return 1;
3618
3619 vcpu->arch.st.msr_val = data;
3620
3621 if (!(data & KVM_MSR_ENABLED))
3622 break;
3623
3624 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3625
3626 break;
3627 case MSR_KVM_PV_EOI_EN:
3628 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3629 return 1;
3630
3631 if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
3632 return 1;
3633 break;
3634
3635 case MSR_KVM_POLL_CONTROL:
3636 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3637 return 1;
3638
3639
3640 if (data & (-1ULL << 1))
3641 return 1;
3642
3643 vcpu->arch.msr_kvm_poll_control = data;
3644 break;
3645
3646 case MSR_IA32_MCG_CTL:
3647 case MSR_IA32_MCG_STATUS:
3648 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3649 return set_msr_mce(vcpu, msr_info);
3650
3651 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3652 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3653 pr = true;
3654 fallthrough;
3655 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3656 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3657 if (kvm_pmu_is_valid_msr(vcpu, msr))
3658 return kvm_pmu_set_msr(vcpu, msr_info);
3659
3660 if (pr || data != 0)
3661 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
3662 "0x%x data 0x%llx\n", msr, data);
3663 break;
3664 case MSR_K7_CLK_CTL:
3665
3666
3667
3668
3669
3670
3671
3672
3673 break;
3674 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3675 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3676 case HV_X64_MSR_SYNDBG_OPTIONS:
3677 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3678 case HV_X64_MSR_CRASH_CTL:
3679 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3680 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3681 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3682 case HV_X64_MSR_TSC_EMULATION_STATUS:
3683 return kvm_hv_set_msr_common(vcpu, msr, data,
3684 msr_info->host_initiated);
3685 case MSR_IA32_BBL_CR_CTL3:
3686
3687
3688
3689 if (report_ignored_msrs)
3690 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
3691 msr, data);
3692 break;
3693 case MSR_AMD64_OSVW_ID_LENGTH:
3694 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3695 return 1;
3696 vcpu->arch.osvw.length = data;
3697 break;
3698 case MSR_AMD64_OSVW_STATUS:
3699 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3700 return 1;
3701 vcpu->arch.osvw.status = data;
3702 break;
3703 case MSR_PLATFORM_INFO:
3704 if (!msr_info->host_initiated ||
3705 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
3706 cpuid_fault_enabled(vcpu)))
3707 return 1;
3708 vcpu->arch.msr_platform_info = data;
3709 break;
3710 case MSR_MISC_FEATURES_ENABLES:
3711 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
3712 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3713 !supports_cpuid_fault(vcpu)))
3714 return 1;
3715 vcpu->arch.msr_misc_features_enables = data;
3716 break;
3717#ifdef CONFIG_X86_64
3718 case MSR_IA32_XFD:
3719 if (!msr_info->host_initiated &&
3720 !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
3721 return 1;
3722
3723 if (data & ~kvm_guest_supported_xfd(vcpu))
3724 return 1;
3725
3726 fpu_update_guest_xfd(&vcpu->arch.guest_fpu, data);
3727 break;
3728 case MSR_IA32_XFD_ERR:
3729 if (!msr_info->host_initiated &&
3730 !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
3731 return 1;
3732
3733 if (data & ~kvm_guest_supported_xfd(vcpu))
3734 return 1;
3735
3736 vcpu->arch.guest_fpu.xfd_err = data;
3737 break;
3738#endif
3739 default:
3740 if (kvm_pmu_is_valid_msr(vcpu, msr))
3741 return kvm_pmu_set_msr(vcpu, msr_info);
3742 return KVM_MSR_RET_INVALID;
3743 }
3744 return 0;
3745}
3746EXPORT_SYMBOL_GPL(kvm_set_msr_common);
3747
3748static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
3749{
3750 u64 data;
3751 u64 mcg_cap = vcpu->arch.mcg_cap;
3752 unsigned bank_num = mcg_cap & 0xff;
3753
3754 switch (msr) {
3755 case MSR_IA32_P5_MC_ADDR:
3756 case MSR_IA32_P5_MC_TYPE:
3757 data = 0;
3758 break;
3759 case MSR_IA32_MCG_CAP:
3760 data = vcpu->arch.mcg_cap;
3761 break;
3762 case MSR_IA32_MCG_CTL:
3763 if (!(mcg_cap & MCG_CTL_P) && !host)
3764 return 1;
3765 data = vcpu->arch.mcg_ctl;
3766 break;
3767 case MSR_IA32_MCG_STATUS:
3768 data = vcpu->arch.mcg_status;
3769 break;
3770 default:
3771 if (msr >= MSR_IA32_MC0_CTL &&
3772 msr < MSR_IA32_MCx_CTL(bank_num)) {
3773 u32 offset = array_index_nospec(
3774 msr - MSR_IA32_MC0_CTL,
3775 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3776
3777 data = vcpu->arch.mce_banks[offset];
3778 break;
3779 }
3780 return 1;
3781 }
3782 *pdata = data;
3783 return 0;
3784}
3785
3786int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3787{
3788 switch (msr_info->index) {
3789 case MSR_IA32_PLATFORM_ID:
3790 case MSR_IA32_EBL_CR_POWERON:
3791 case MSR_IA32_LASTBRANCHFROMIP:
3792 case MSR_IA32_LASTBRANCHTOIP:
3793 case MSR_IA32_LASTINTFROMIP:
3794 case MSR_IA32_LASTINTTOIP:
3795 case MSR_AMD64_SYSCFG:
3796 case MSR_K8_TSEG_ADDR:
3797 case MSR_K8_TSEG_MASK:
3798 case MSR_VM_HSAVE_PA:
3799 case MSR_K8_INT_PENDING_MSG:
3800 case MSR_AMD64_NB_CFG:
3801 case MSR_FAM10H_MMIO_CONF_BASE:
3802 case MSR_AMD64_BU_CFG2:
3803 case MSR_IA32_PERF_CTL:
3804 case MSR_AMD64_DC_CFG:
3805 case MSR_F15H_EX_CFG:
3806
3807
3808
3809
3810
3811
3812 case MSR_RAPL_POWER_UNIT:
3813 case MSR_PP0_ENERGY_STATUS:
3814 case MSR_PP1_ENERGY_STATUS:
3815 case MSR_PKG_ENERGY_STATUS:
3816 case MSR_DRAM_ENERGY_STATUS:
3817 msr_info->data = 0;
3818 break;
3819 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3820 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3821 return kvm_pmu_get_msr(vcpu, msr_info);
3822 if (!msr_info->host_initiated)
3823 return 1;
3824 msr_info->data = 0;
3825 break;
3826 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3827 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3828 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3829 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3830 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3831 return kvm_pmu_get_msr(vcpu, msr_info);
3832 msr_info->data = 0;
3833 break;
3834 case MSR_IA32_UCODE_REV:
3835 msr_info->data = vcpu->arch.microcode_version;
3836 break;
3837 case MSR_IA32_ARCH_CAPABILITIES:
3838 if (!msr_info->host_initiated &&
3839 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3840 return 1;
3841 msr_info->data = vcpu->arch.arch_capabilities;
3842 break;
3843 case MSR_IA32_PERF_CAPABILITIES:
3844 if (!msr_info->host_initiated &&
3845 !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
3846 return 1;
3847 msr_info->data = vcpu->arch.perf_capabilities;
3848 break;
3849 case MSR_IA32_POWER_CTL:
3850 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
3851 break;
3852 case MSR_IA32_TSC: {
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862 u64 offset, ratio;
3863
3864 if (msr_info->host_initiated) {
3865 offset = vcpu->arch.l1_tsc_offset;
3866 ratio = vcpu->arch.l1_tsc_scaling_ratio;
3867 } else {
3868 offset = vcpu->arch.tsc_offset;
3869 ratio = vcpu->arch.tsc_scaling_ratio;
3870 }
3871
3872 msr_info->data = kvm_scale_tsc(vcpu, rdtsc(), ratio) + offset;
3873 break;
3874 }
3875 case MSR_MTRRcap:
3876 case 0x200 ... 0x2ff:
3877 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
3878 case 0xcd:
3879 msr_info->data = 3;
3880 break;
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892 case MSR_EBC_FREQUENCY_ID:
3893 msr_info->data = 1 << 24;
3894 break;
3895 case MSR_IA32_APICBASE:
3896 msr_info->data = kvm_get_apic_base(vcpu);
3897 break;
3898 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3899 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
3900 case MSR_IA32_TSC_DEADLINE:
3901 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
3902 break;
3903 case MSR_IA32_TSC_ADJUST:
3904 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
3905 break;
3906 case MSR_IA32_MISC_ENABLE:
3907 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
3908 break;
3909 case MSR_IA32_SMBASE:
3910 if (!msr_info->host_initiated)
3911 return 1;
3912 msr_info->data = vcpu->arch.smbase;
3913 break;
3914 case MSR_SMI_COUNT:
3915 msr_info->data = vcpu->arch.smi_count;
3916 break;
3917 case MSR_IA32_PERF_STATUS:
3918
3919 msr_info->data = 1000ULL;
3920
3921 msr_info->data |= (((uint64_t)4ULL) << 40);
3922 break;
3923 case MSR_EFER:
3924 msr_info->data = vcpu->arch.efer;
3925 break;
3926 case MSR_KVM_WALL_CLOCK:
3927 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3928 return 1;
3929
3930 msr_info->data = vcpu->kvm->arch.wall_clock;
3931 break;
3932 case MSR_KVM_WALL_CLOCK_NEW:
3933 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3934 return 1;
3935
3936 msr_info->data = vcpu->kvm->arch.wall_clock;
3937 break;
3938 case MSR_KVM_SYSTEM_TIME:
3939 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3940 return 1;
3941
3942 msr_info->data = vcpu->arch.time;
3943 break;
3944 case MSR_KVM_SYSTEM_TIME_NEW:
3945 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3946 return 1;
3947
3948 msr_info->data = vcpu->arch.time;
3949 break;
3950 case MSR_KVM_ASYNC_PF_EN:
3951 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3952 return 1;
3953
3954 msr_info->data = vcpu->arch.apf.msr_en_val;
3955 break;
3956 case MSR_KVM_ASYNC_PF_INT:
3957 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3958 return 1;
3959
3960 msr_info->data = vcpu->arch.apf.msr_int_val;
3961 break;
3962 case MSR_KVM_ASYNC_PF_ACK:
3963 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3964 return 1;
3965
3966 msr_info->data = 0;
3967 break;
3968 case MSR_KVM_STEAL_TIME:
3969 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3970 return 1;
3971
3972 msr_info->data = vcpu->arch.st.msr_val;
3973 break;
3974 case MSR_KVM_PV_EOI_EN:
3975 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3976 return 1;
3977
3978 msr_info->data = vcpu->arch.pv_eoi.msr_val;
3979 break;
3980 case MSR_KVM_POLL_CONTROL:
3981 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3982 return 1;
3983
3984 msr_info->data = vcpu->arch.msr_kvm_poll_control;
3985 break;
3986 case MSR_IA32_P5_MC_ADDR:
3987 case MSR_IA32_P5_MC_TYPE:
3988 case MSR_IA32_MCG_CAP:
3989 case MSR_IA32_MCG_CTL:
3990 case MSR_IA32_MCG_STATUS:
3991 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3992 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
3993 msr_info->host_initiated);
3994 case MSR_IA32_XSS:
3995 if (!msr_info->host_initiated &&
3996 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3997 return 1;
3998 msr_info->data = vcpu->arch.ia32_xss;
3999 break;
4000 case MSR_K7_CLK_CTL:
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010 msr_info->data = 0x20000000;
4011 break;
4012 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
4013 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
4014 case HV_X64_MSR_SYNDBG_OPTIONS:
4015 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
4016 case HV_X64_MSR_CRASH_CTL:
4017 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
4018 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
4019 case HV_X64_MSR_TSC_EMULATION_CONTROL:
4020 case HV_X64_MSR_TSC_EMULATION_STATUS:
4021 return kvm_hv_get_msr_common(vcpu,
4022 msr_info->index, &msr_info->data,
4023 msr_info->host_initiated);
4024 case MSR_IA32_BBL_CR_CTL3:
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035 msr_info->data = 0xbe702111;
4036 break;
4037 case MSR_AMD64_OSVW_ID_LENGTH:
4038 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
4039 return 1;
4040 msr_info->data = vcpu->arch.osvw.length;
4041 break;
4042 case MSR_AMD64_OSVW_STATUS:
4043 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
4044 return 1;
4045 msr_info->data = vcpu->arch.osvw.status;
4046 break;
4047 case MSR_PLATFORM_INFO:
4048 if (!msr_info->host_initiated &&
4049 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
4050 return 1;
4051 msr_info->data = vcpu->arch.msr_platform_info;
4052 break;
4053 case MSR_MISC_FEATURES_ENABLES:
4054 msr_info->data = vcpu->arch.msr_misc_features_enables;
4055 break;
4056 case MSR_K7_HWCR:
4057 msr_info->data = vcpu->arch.msr_hwcr;
4058 break;
4059#ifdef CONFIG_X86_64
4060 case MSR_IA32_XFD:
4061 if (!msr_info->host_initiated &&
4062 !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
4063 return 1;
4064
4065 msr_info->data = vcpu->arch.guest_fpu.fpstate->xfd;
4066 break;
4067 case MSR_IA32_XFD_ERR:
4068 if (!msr_info->host_initiated &&
4069 !guest_cpuid_has(vcpu, X86_FEATURE_XFD))
4070 return 1;
4071
4072 msr_info->data = vcpu->arch.guest_fpu.xfd_err;
4073 break;
4074#endif
4075 default:
4076 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
4077 return kvm_pmu_get_msr(vcpu, msr_info);
4078 return KVM_MSR_RET_INVALID;
4079 }
4080 return 0;
4081}
4082EXPORT_SYMBOL_GPL(kvm_get_msr_common);
4083
4084
4085
4086
4087
4088
4089static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
4090 struct kvm_msr_entry *entries,
4091 int (*do_msr)(struct kvm_vcpu *vcpu,
4092 unsigned index, u64 *data))
4093{
4094 int i;
4095
4096 for (i = 0; i < msrs->nmsrs; ++i)
4097 if (do_msr(vcpu, entries[i].index, &entries[i].data))
4098 break;
4099
4100 return i;
4101}
4102
4103
4104
4105
4106
4107
4108static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
4109 int (*do_msr)(struct kvm_vcpu *vcpu,
4110 unsigned index, u64 *data),
4111 int writeback)
4112{
4113 struct kvm_msrs msrs;
4114 struct kvm_msr_entry *entries;
4115 int r, n;
4116 unsigned size;
4117
4118 r = -EFAULT;
4119 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
4120 goto out;
4121
4122 r = -E2BIG;
4123 if (msrs.nmsrs >= MAX_IO_MSRS)
4124 goto out;
4125
4126 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
4127 entries = memdup_user(user_msrs->entries, size);
4128 if (IS_ERR(entries)) {
4129 r = PTR_ERR(entries);
4130 goto out;
4131 }
4132
4133 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
4134 if (r < 0)
4135 goto out_free;
4136
4137 r = -EFAULT;
4138 if (writeback && copy_to_user(user_msrs->entries, entries, size))
4139 goto out_free;
4140
4141 r = n;
4142
4143out_free:
4144 kfree(entries);
4145out:
4146 return r;
4147}
4148
4149static inline bool kvm_can_mwait_in_guest(void)
4150{
4151 return boot_cpu_has(X86_FEATURE_MWAIT) &&
4152 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
4153 boot_cpu_has(X86_FEATURE_ARAT);
4154}
4155
4156static int kvm_ioctl_get_supported_hv_cpuid(struct kvm_vcpu *vcpu,
4157 struct kvm_cpuid2 __user *cpuid_arg)
4158{
4159 struct kvm_cpuid2 cpuid;
4160 int r;
4161
4162 r = -EFAULT;
4163 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4164 return r;
4165
4166 r = kvm_get_hv_cpuid(vcpu, &cpuid, cpuid_arg->entries);
4167 if (r)
4168 return r;
4169
4170 r = -EFAULT;
4171 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4172 return r;
4173
4174 return 0;
4175}
4176
4177int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
4178{
4179 int r = 0;
4180
4181 switch (ext) {
4182 case KVM_CAP_IRQCHIP:
4183 case KVM_CAP_HLT:
4184 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
4185 case KVM_CAP_SET_TSS_ADDR:
4186 case KVM_CAP_EXT_CPUID:
4187 case KVM_CAP_EXT_EMUL_CPUID:
4188 case KVM_CAP_CLOCKSOURCE:
4189 case KVM_CAP_PIT:
4190 case KVM_CAP_NOP_IO_DELAY:
4191 case KVM_CAP_MP_STATE:
4192 case KVM_CAP_SYNC_MMU:
4193 case KVM_CAP_USER_NMI:
4194 case KVM_CAP_REINJECT_CONTROL:
4195 case KVM_CAP_IRQ_INJECT_STATUS:
4196 case KVM_CAP_IOEVENTFD:
4197 case KVM_CAP_IOEVENTFD_NO_LENGTH:
4198 case KVM_CAP_PIT2:
4199 case KVM_CAP_PIT_STATE2:
4200 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
4201 case KVM_CAP_VCPU_EVENTS:
4202 case KVM_CAP_HYPERV:
4203 case KVM_CAP_HYPERV_VAPIC:
4204 case KVM_CAP_HYPERV_SPIN:
4205 case KVM_CAP_HYPERV_SYNIC:
4206 case KVM_CAP_HYPERV_SYNIC2:
4207 case KVM_CAP_HYPERV_VP_INDEX:
4208 case KVM_CAP_HYPERV_EVENTFD:
4209 case KVM_CAP_HYPERV_TLBFLUSH:
4210 case KVM_CAP_HYPERV_SEND_IPI:
4211 case KVM_CAP_HYPERV_CPUID:
4212 case KVM_CAP_HYPERV_ENFORCE_CPUID:
4213 case KVM_CAP_SYS_HYPERV_CPUID:
4214 case KVM_CAP_PCI_SEGMENT:
4215 case KVM_CAP_DEBUGREGS:
4216 case KVM_CAP_X86_ROBUST_SINGLESTEP:
4217 case KVM_CAP_XSAVE:
4218 case KVM_CAP_ASYNC_PF:
4219 case KVM_CAP_ASYNC_PF_INT:
4220 case KVM_CAP_GET_TSC_KHZ:
4221 case KVM_CAP_KVMCLOCK_CTRL:
4222 case KVM_CAP_READONLY_MEM:
4223 case KVM_CAP_HYPERV_TIME:
4224 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
4225 case KVM_CAP_TSC_DEADLINE_TIMER:
4226 case KVM_CAP_DISABLE_QUIRKS:
4227 case KVM_CAP_SET_BOOT_CPU_ID:
4228 case KVM_CAP_SPLIT_IRQCHIP:
4229 case KVM_CAP_IMMEDIATE_EXIT:
4230 case KVM_CAP_PMU_EVENT_FILTER:
4231 case KVM_CAP_GET_MSR_FEATURES:
4232 case KVM_CAP_MSR_PLATFORM_INFO:
4233 case KVM_CAP_EXCEPTION_PAYLOAD:
4234 case KVM_CAP_SET_GUEST_DEBUG:
4235 case KVM_CAP_LAST_CPU:
4236 case KVM_CAP_X86_USER_SPACE_MSR:
4237 case KVM_CAP_X86_MSR_FILTER:
4238 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
4239#ifdef CONFIG_X86_SGX_KVM
4240 case KVM_CAP_SGX_ATTRIBUTE:
4241#endif
4242 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
4243 case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
4244 case KVM_CAP_SREGS2:
4245 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
4246 case KVM_CAP_VCPU_ATTRIBUTES:
4247 case KVM_CAP_SYS_ATTRIBUTES:
4248 case KVM_CAP_ENABLE_CAP:
4249 r = 1;
4250 break;
4251 case KVM_CAP_EXIT_HYPERCALL:
4252 r = KVM_EXIT_HYPERCALL_VALID_MASK;
4253 break;
4254 case KVM_CAP_SET_GUEST_DEBUG2:
4255 return KVM_GUESTDBG_VALID_MASK;
4256#ifdef CONFIG_KVM_XEN
4257 case KVM_CAP_XEN_HVM:
4258 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
4259 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
4260 KVM_XEN_HVM_CONFIG_SHARED_INFO |
4261 KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL;
4262 if (sched_info_on())
4263 r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
4264 break;
4265#endif
4266 case KVM_CAP_SYNC_REGS:
4267 r = KVM_SYNC_X86_VALID_FIELDS;
4268 break;
4269 case KVM_CAP_ADJUST_CLOCK:
4270 r = KVM_CLOCK_VALID_FLAGS;
4271 break;
4272 case KVM_CAP_X86_DISABLE_EXITS:
4273 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
4274 KVM_X86_DISABLE_EXITS_CSTATE;
4275 if(kvm_can_mwait_in_guest())
4276 r |= KVM_X86_DISABLE_EXITS_MWAIT;
4277 break;
4278 case KVM_CAP_X86_SMM:
4279
4280
4281
4282
4283
4284
4285
4286
4287 r = static_call(kvm_x86_has_emulated_msr)(kvm, MSR_IA32_SMBASE);
4288 break;
4289 case KVM_CAP_VAPIC:
4290 r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
4291 break;
4292 case KVM_CAP_NR_VCPUS:
4293 r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
4294 break;
4295 case KVM_CAP_MAX_VCPUS:
4296 r = KVM_MAX_VCPUS;
4297 break;
4298 case KVM_CAP_MAX_VCPU_ID:
4299 r = KVM_MAX_VCPU_IDS;
4300 break;
4301 case KVM_CAP_PV_MMU:
4302 r = 0;
4303 break;
4304 case KVM_CAP_MCE:
4305 r = KVM_MAX_MCE_BANKS;
4306 break;
4307 case KVM_CAP_XCRS:
4308 r = boot_cpu_has(X86_FEATURE_XSAVE);
4309 break;
4310 case KVM_CAP_TSC_CONTROL:
4311 r = kvm_has_tsc_control;
4312 break;
4313 case KVM_CAP_X2APIC_API:
4314 r = KVM_X2APIC_API_VALID_FLAGS;
4315 break;
4316 case KVM_CAP_NESTED_STATE:
4317 r = kvm_x86_ops.nested_ops->get_state ?
4318 kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
4319 break;
4320 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4321 r = kvm_x86_ops.enable_direct_tlbflush != NULL;
4322 break;
4323 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4324 r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
4325 break;
4326 case KVM_CAP_SMALLER_MAXPHYADDR:
4327 r = (int) allow_smaller_maxphyaddr;
4328 break;
4329 case KVM_CAP_STEAL_TIME:
4330 r = sched_info_on();
4331 break;
4332 case KVM_CAP_X86_BUS_LOCK_EXIT:
4333 if (kvm_has_bus_lock_exit)
4334 r = KVM_BUS_LOCK_DETECTION_OFF |
4335 KVM_BUS_LOCK_DETECTION_EXIT;
4336 else
4337 r = 0;
4338 break;
4339 case KVM_CAP_XSAVE2: {
4340 u64 guest_perm = xstate_get_guest_group_perm();
4341
4342 r = xstate_required_size(supported_xcr0 & guest_perm, false);
4343 if (r < sizeof(struct kvm_xsave))
4344 r = sizeof(struct kvm_xsave);
4345 break;
4346 }
4347 default:
4348 break;
4349 }
4350 return r;
4351}
4352
4353static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr)
4354{
4355 void __user *uaddr = (void __user*)(unsigned long)attr->addr;
4356
4357 if ((u64)(unsigned long)uaddr != attr->addr)
4358 return ERR_PTR_USR(-EFAULT);
4359 return uaddr;
4360}
4361
4362static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
4363{
4364 u64 __user *uaddr = kvm_get_attr_addr(attr);
4365
4366 if (attr->group)
4367 return -ENXIO;
4368
4369 if (IS_ERR(uaddr))
4370 return PTR_ERR(uaddr);
4371
4372 switch (attr->attr) {
4373 case KVM_X86_XCOMP_GUEST_SUPP:
4374 if (put_user(supported_xcr0, uaddr))
4375 return -EFAULT;
4376 return 0;
4377 default:
4378 return -ENXIO;
4379 break;
4380 }
4381}
4382
4383static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr)
4384{
4385 if (attr->group)
4386 return -ENXIO;
4387
4388 switch (attr->attr) {
4389 case KVM_X86_XCOMP_GUEST_SUPP:
4390 return 0;
4391 default:
4392 return -ENXIO;
4393 }
4394}
4395
4396long kvm_arch_dev_ioctl(struct file *filp,
4397 unsigned int ioctl, unsigned long arg)
4398{
4399 void __user *argp = (void __user *)arg;
4400 long r;
4401
4402 switch (ioctl) {
4403 case KVM_GET_MSR_INDEX_LIST: {
4404 struct kvm_msr_list __user *user_msr_list = argp;
4405 struct kvm_msr_list msr_list;
4406 unsigned n;
4407
4408 r = -EFAULT;
4409 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4410 goto out;
4411 n = msr_list.nmsrs;
4412 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
4413 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4414 goto out;
4415 r = -E2BIG;
4416 if (n < msr_list.nmsrs)
4417 goto out;
4418 r = -EFAULT;
4419 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
4420 num_msrs_to_save * sizeof(u32)))
4421 goto out;
4422 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
4423 &emulated_msrs,
4424 num_emulated_msrs * sizeof(u32)))
4425 goto out;
4426 r = 0;
4427 break;
4428 }
4429 case KVM_GET_SUPPORTED_CPUID:
4430 case KVM_GET_EMULATED_CPUID: {
4431 struct kvm_cpuid2 __user *cpuid_arg = argp;
4432 struct kvm_cpuid2 cpuid;
4433
4434 r = -EFAULT;
4435 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4436 goto out;
4437
4438 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
4439 ioctl);
4440 if (r)
4441 goto out;
4442
4443 r = -EFAULT;
4444 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4445 goto out;
4446 r = 0;
4447 break;
4448 }
4449 case KVM_X86_GET_MCE_CAP_SUPPORTED:
4450 r = -EFAULT;
4451 if (copy_to_user(argp, &kvm_mce_cap_supported,
4452 sizeof(kvm_mce_cap_supported)))
4453 goto out;
4454 r = 0;
4455 break;
4456 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
4457 struct kvm_msr_list __user *user_msr_list = argp;
4458 struct kvm_msr_list msr_list;
4459 unsigned int n;
4460
4461 r = -EFAULT;
4462 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
4463 goto out;
4464 n = msr_list.nmsrs;
4465 msr_list.nmsrs = num_msr_based_features;
4466 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
4467 goto out;
4468 r = -E2BIG;
4469 if (n < msr_list.nmsrs)
4470 goto out;
4471 r = -EFAULT;
4472 if (copy_to_user(user_msr_list->indices, &msr_based_features,
4473 num_msr_based_features * sizeof(u32)))
4474 goto out;
4475 r = 0;
4476 break;
4477 }
4478 case KVM_GET_MSRS:
4479 r = msr_io(NULL, argp, do_get_msr_feature, 1);
4480 break;
4481 case KVM_GET_SUPPORTED_HV_CPUID:
4482 r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp);
4483 break;
4484 case KVM_GET_DEVICE_ATTR: {
4485 struct kvm_device_attr attr;
4486 r = -EFAULT;
4487 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
4488 break;
4489 r = kvm_x86_dev_get_attr(&attr);
4490 break;
4491 }
4492 case KVM_HAS_DEVICE_ATTR: {
4493 struct kvm_device_attr attr;
4494 r = -EFAULT;
4495 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
4496 break;
4497 r = kvm_x86_dev_has_attr(&attr);
4498 break;
4499 }
4500 default:
4501 r = -EINVAL;
4502 break;
4503 }
4504out:
4505 return r;
4506}
4507
4508static void wbinvd_ipi(void *garbage)
4509{
4510 wbinvd();
4511}
4512
4513static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
4514{
4515 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
4516}
4517
4518void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4519{
4520
4521 if (need_emulate_wbinvd(vcpu)) {
4522 if (static_call(kvm_x86_has_wbinvd_exit)())
4523 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4524 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
4525 smp_call_function_single(vcpu->cpu,
4526 wbinvd_ipi, NULL, 1);
4527 }
4528
4529 static_call(kvm_x86_vcpu_load)(vcpu, cpu);
4530
4531
4532 vcpu->arch.host_pkru = read_pkru();
4533
4534
4535 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
4536 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
4537 vcpu->arch.tsc_offset_adjustment = 0;
4538 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4539 }
4540
4541 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
4542 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
4543 rdtsc() - vcpu->arch.last_host_tsc;
4544 if (tsc_delta < 0)
4545 mark_tsc_unstable("KVM discovered backwards TSC");
4546
4547 if (kvm_check_tsc_unstable()) {
4548 u64 offset = kvm_compute_l1_tsc_offset(vcpu,
4549 vcpu->arch.last_guest_tsc);
4550 kvm_vcpu_write_tsc_offset(vcpu, offset);
4551 vcpu->arch.tsc_catchup = 1;
4552 }
4553
4554 if (kvm_lapic_hv_timer_in_use(vcpu))
4555 kvm_lapic_restart_hv_timer(vcpu);
4556
4557
4558
4559
4560
4561 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
4562 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
4563 if (vcpu->cpu != cpu)
4564 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
4565 vcpu->cpu = cpu;
4566 }
4567
4568 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
4569}
4570
4571static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
4572{
4573 struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
4574 struct kvm_steal_time __user *st;
4575 struct kvm_memslots *slots;
4576 static const u8 preempted = KVM_VCPU_PREEMPTED;
4577
4578 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
4579 return;
4580
4581 if (vcpu->arch.st.preempted)
4582 return;
4583
4584
4585 if (unlikely(current->mm != vcpu->kvm->mm))
4586 return;
4587
4588 slots = kvm_memslots(vcpu->kvm);
4589
4590 if (unlikely(slots->generation != ghc->generation ||
4591 kvm_is_error_hva(ghc->hva) || !ghc->memslot))
4592 return;
4593
4594 st = (struct kvm_steal_time __user *)ghc->hva;
4595 BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
4596
4597 if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
4598 vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
4599
4600 mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
4601}
4602
4603void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
4604{
4605 int idx;
4606
4607 if (vcpu->preempted && !vcpu->arch.guest_state_protected)
4608 vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
4609
4610
4611
4612
4613
4614 idx = srcu_read_lock(&vcpu->kvm->srcu);
4615 if (kvm_xen_msr_enabled(vcpu->kvm))
4616 kvm_xen_runstate_set_preempted(vcpu);
4617 else
4618 kvm_steal_time_set_preempted(vcpu);
4619 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4620
4621 static_call(kvm_x86_vcpu_put)(vcpu);
4622 vcpu->arch.last_host_tsc = rdtsc();
4623}
4624
4625static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
4626 struct kvm_lapic_state *s)
4627{
4628 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
4629
4630 return kvm_apic_get_state(vcpu, s);
4631}
4632
4633static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
4634 struct kvm_lapic_state *s)
4635{
4636 int r;
4637
4638 r = kvm_apic_set_state(vcpu, s);
4639 if (r)
4640 return r;
4641 update_cr8_intercept(vcpu);
4642
4643 return 0;
4644}
4645
4646static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
4647{
4648
4649
4650
4651
4652
4653
4654 if (kvm_cpu_has_extint(vcpu))
4655 return false;
4656
4657
4658 return (!lapic_in_kernel(vcpu) ||
4659 kvm_apic_accept_pic_intr(vcpu));
4660}
4661
4662static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
4663{
4664
4665
4666
4667
4668
4669
4670
4671 return (kvm_arch_interrupt_allowed(vcpu) &&
4672 kvm_cpu_accept_dm_intr(vcpu) &&
4673 !kvm_event_needs_reinjection(vcpu) &&
4674 !vcpu->arch.exception.pending);
4675}
4676
4677static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
4678 struct kvm_interrupt *irq)
4679{
4680 if (irq->irq >= KVM_NR_INTERRUPTS)
4681 return -EINVAL;
4682
4683 if (!irqchip_in_kernel(vcpu->kvm)) {
4684 kvm_queue_interrupt(vcpu, irq->irq, false);
4685 kvm_make_request(KVM_REQ_EVENT, vcpu);
4686 return 0;
4687 }
4688
4689
4690
4691
4692
4693 if (pic_in_kernel(vcpu->kvm))
4694 return -ENXIO;
4695
4696 if (vcpu->arch.pending_external_vector != -1)
4697 return -EEXIST;
4698
4699 vcpu->arch.pending_external_vector = irq->irq;
4700 kvm_make_request(KVM_REQ_EVENT, vcpu);
4701 return 0;
4702}
4703
4704static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
4705{
4706 kvm_inject_nmi(vcpu);
4707
4708 return 0;
4709}
4710
4711static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
4712{
4713 kvm_make_request(KVM_REQ_SMI, vcpu);
4714
4715 return 0;
4716}
4717
4718static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
4719 struct kvm_tpr_access_ctl *tac)
4720{
4721 if (tac->flags)
4722 return -EINVAL;
4723 vcpu->arch.tpr_access_reporting = !!tac->enabled;
4724 return 0;
4725}
4726
4727static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
4728 u64 mcg_cap)
4729{
4730 int r;
4731 unsigned bank_num = mcg_cap & 0xff, bank;
4732
4733 r = -EINVAL;
4734 if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
4735 goto out;
4736 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
4737 goto out;
4738 r = 0;
4739 vcpu->arch.mcg_cap = mcg_cap;
4740
4741 if (mcg_cap & MCG_CTL_P)
4742 vcpu->arch.mcg_ctl = ~(u64)0;
4743
4744 for (bank = 0; bank < bank_num; bank++)
4745 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
4746
4747 static_call(kvm_x86_setup_mce)(vcpu);
4748out:
4749 return r;
4750}
4751
4752static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
4753 struct kvm_x86_mce *mce)
4754{
4755 u64 mcg_cap = vcpu->arch.mcg_cap;
4756 unsigned bank_num = mcg_cap & 0xff;
4757 u64 *banks = vcpu->arch.mce_banks;
4758
4759 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
4760 return -EINVAL;
4761
4762
4763
4764
4765 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
4766 vcpu->arch.mcg_ctl != ~(u64)0)
4767 return 0;
4768 banks += 4 * mce->bank;
4769
4770
4771
4772
4773 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
4774 return 0;
4775 if (mce->status & MCI_STATUS_UC) {
4776 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
4777 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
4778 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
4779 return 0;
4780 }
4781 if (banks[1] & MCI_STATUS_VAL)
4782 mce->status |= MCI_STATUS_OVER;
4783 banks[2] = mce->addr;
4784 banks[3] = mce->misc;
4785 vcpu->arch.mcg_status = mce->mcg_status;
4786 banks[1] = mce->status;
4787 kvm_queue_exception(vcpu, MC_VECTOR);
4788 } else if (!(banks[1] & MCI_STATUS_VAL)
4789 || !(banks[1] & MCI_STATUS_UC)) {
4790 if (banks[1] & MCI_STATUS_VAL)
4791 mce->status |= MCI_STATUS_OVER;
4792 banks[2] = mce->addr;
4793 banks[3] = mce->misc;
4794 banks[1] = mce->status;
4795 } else
4796 banks[1] |= MCI_STATUS_OVER;
4797 return 0;
4798}
4799
4800static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
4801 struct kvm_vcpu_events *events)
4802{
4803 process_nmi(vcpu);
4804
4805 if (kvm_check_request(KVM_REQ_SMI, vcpu))
4806 process_smi(vcpu);
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819 if (!vcpu->kvm->arch.exception_payload_enabled &&
4820 vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
4821 kvm_deliver_exception_payload(vcpu);
4822
4823
4824
4825
4826
4827
4828
4829 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
4830 events->exception.injected = 0;
4831 events->exception.pending = 0;
4832 } else {
4833 events->exception.injected = vcpu->arch.exception.injected;
4834 events->exception.pending = vcpu->arch.exception.pending;
4835
4836
4837
4838
4839
4840 if (!vcpu->kvm->arch.exception_payload_enabled)
4841 events->exception.injected |=
4842 vcpu->arch.exception.pending;
4843 }
4844 events->exception.nr = vcpu->arch.exception.nr;
4845 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
4846 events->exception.error_code = vcpu->arch.exception.error_code;
4847 events->exception_has_payload = vcpu->arch.exception.has_payload;
4848 events->exception_payload = vcpu->arch.exception.payload;
4849
4850 events->interrupt.injected =
4851 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
4852 events->interrupt.nr = vcpu->arch.interrupt.nr;
4853 events->interrupt.soft = 0;
4854 events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
4855
4856 events->nmi.injected = vcpu->arch.nmi_injected;
4857 events->nmi.pending = vcpu->arch.nmi_pending != 0;
4858 events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
4859 events->nmi.pad = 0;
4860
4861 events->sipi_vector = 0;
4862
4863 events->smi.smm = is_smm(vcpu);
4864 events->smi.pending = vcpu->arch.smi_pending;
4865 events->smi.smm_inside_nmi =
4866 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
4867 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
4868
4869 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
4870 | KVM_VCPUEVENT_VALID_SHADOW
4871 | KVM_VCPUEVENT_VALID_SMM);
4872 if (vcpu->kvm->arch.exception_payload_enabled)
4873 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
4874
4875 memset(&events->reserved, 0, sizeof(events->reserved));
4876}
4877
4878static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
4879
4880static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
4881 struct kvm_vcpu_events *events)
4882{
4883 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
4884 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
4885 | KVM_VCPUEVENT_VALID_SHADOW
4886 | KVM_VCPUEVENT_VALID_SMM
4887 | KVM_VCPUEVENT_VALID_PAYLOAD))
4888 return -EINVAL;
4889
4890 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
4891 if (!vcpu->kvm->arch.exception_payload_enabled)
4892 return -EINVAL;
4893 if (events->exception.pending)
4894 events->exception.injected = 0;
4895 else
4896 events->exception_has_payload = 0;
4897 } else {
4898 events->exception.pending = 0;
4899 events->exception_has_payload = 0;
4900 }
4901
4902 if ((events->exception.injected || events->exception.pending) &&
4903 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
4904 return -EINVAL;
4905
4906
4907 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
4908 (events->smi.smm || events->smi.pending) &&
4909 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4910 return -EINVAL;
4911
4912 process_nmi(vcpu);
4913 vcpu->arch.exception.injected = events->exception.injected;
4914 vcpu->arch.exception.pending = events->exception.pending;
4915 vcpu->arch.exception.nr = events->exception.nr;
4916 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
4917 vcpu->arch.exception.error_code = events->exception.error_code;
4918 vcpu->arch.exception.has_payload = events->exception_has_payload;
4919 vcpu->arch.exception.payload = events->exception_payload;
4920
4921 vcpu->arch.interrupt.injected = events->interrupt.injected;
4922 vcpu->arch.interrupt.nr = events->interrupt.nr;
4923 vcpu->arch.interrupt.soft = events->interrupt.soft;
4924 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
4925 static_call(kvm_x86_set_interrupt_shadow)(vcpu,
4926 events->interrupt.shadow);
4927
4928 vcpu->arch.nmi_injected = events->nmi.injected;
4929 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
4930 vcpu->arch.nmi_pending = events->nmi.pending;
4931 static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
4932
4933 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
4934 lapic_in_kernel(vcpu))
4935 vcpu->arch.apic->sipi_vector = events->sipi_vector;
4936
4937 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
4938 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
4939 kvm_x86_ops.nested_ops->leave_nested(vcpu);
4940 kvm_smm_changed(vcpu, events->smi.smm);
4941 }
4942
4943 vcpu->arch.smi_pending = events->smi.pending;
4944
4945 if (events->smi.smm) {
4946 if (events->smi.smm_inside_nmi)
4947 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
4948 else
4949 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
4950 }
4951
4952 if (lapic_in_kernel(vcpu)) {
4953 if (events->smi.latched_init)
4954 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4955 else
4956 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4957 }
4958 }
4959
4960 kvm_make_request(KVM_REQ_EVENT, vcpu);
4961
4962 return 0;
4963}
4964
4965static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
4966 struct kvm_debugregs *dbgregs)
4967{
4968 unsigned long val;
4969
4970 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
4971 kvm_get_dr(vcpu, 6, &val);
4972 dbgregs->dr6 = val;
4973 dbgregs->dr7 = vcpu->arch.dr7;
4974 dbgregs->flags = 0;
4975 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
4976}
4977
4978static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
4979 struct kvm_debugregs *dbgregs)
4980{
4981 if (dbgregs->flags)
4982 return -EINVAL;
4983
4984 if (!kvm_dr6_valid(dbgregs->dr6))
4985 return -EINVAL;
4986 if (!kvm_dr7_valid(dbgregs->dr7))
4987 return -EINVAL;
4988
4989 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
4990 kvm_update_dr0123(vcpu);
4991 vcpu->arch.dr6 = dbgregs->dr6;
4992 vcpu->arch.dr7 = dbgregs->dr7;
4993 kvm_update_dr7(vcpu);
4994
4995 return 0;
4996}
4997
4998static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
4999 struct kvm_xsave *guest_xsave)
5000{
5001 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
5002 return;
5003
5004 fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
5005 guest_xsave->region,
5006 sizeof(guest_xsave->region),
5007 vcpu->arch.pkru);
5008}
5009
5010static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu,
5011 u8 *state, unsigned int size)
5012{
5013 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
5014 return;
5015
5016 fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
5017 state, size, vcpu->arch.pkru);
5018}
5019
5020static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
5021 struct kvm_xsave *guest_xsave)
5022{
5023 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
5024 return 0;
5025
5026 return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
5027 guest_xsave->region,
5028 supported_xcr0, &vcpu->arch.pkru);
5029}
5030
5031static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
5032 struct kvm_xcrs *guest_xcrs)
5033{
5034 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
5035 guest_xcrs->nr_xcrs = 0;
5036 return;
5037 }
5038
5039 guest_xcrs->nr_xcrs = 1;
5040 guest_xcrs->flags = 0;
5041 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
5042 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
5043}
5044
5045static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
5046 struct kvm_xcrs *guest_xcrs)
5047{
5048 int i, r = 0;
5049
5050 if (!boot_cpu_has(X86_FEATURE_XSAVE))
5051 return -EINVAL;
5052
5053 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
5054 return -EINVAL;
5055
5056 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
5057
5058 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
5059 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
5060 guest_xcrs->xcrs[i].value);
5061 break;
5062 }
5063 if (r)
5064 r = -EINVAL;
5065 return r;
5066}
5067
5068
5069
5070
5071
5072
5073
5074static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
5075{
5076 if (!vcpu->arch.pv_time_enabled)
5077 return -EINVAL;
5078 vcpu->arch.pvclock_set_guest_stopped_request = true;
5079 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5080 return 0;
5081}
5082
5083static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu,
5084 struct kvm_device_attr *attr)
5085{
5086 int r;
5087
5088 switch (attr->attr) {
5089 case KVM_VCPU_TSC_OFFSET:
5090 r = 0;
5091 break;
5092 default:
5093 r = -ENXIO;
5094 }
5095
5096 return r;
5097}
5098
5099static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu,
5100 struct kvm_device_attr *attr)
5101{
5102 u64 __user *uaddr = kvm_get_attr_addr(attr);
5103 int r;
5104
5105 if (IS_ERR(uaddr))
5106 return PTR_ERR(uaddr);
5107
5108 switch (attr->attr) {
5109 case KVM_VCPU_TSC_OFFSET:
5110 r = -EFAULT;
5111 if (put_user(vcpu->arch.l1_tsc_offset, uaddr))
5112 break;
5113 r = 0;
5114 break;
5115 default:
5116 r = -ENXIO;
5117 }
5118
5119 return r;
5120}
5121
5122static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu,
5123 struct kvm_device_attr *attr)
5124{
5125 u64 __user *uaddr = kvm_get_attr_addr(attr);
5126 struct kvm *kvm = vcpu->kvm;
5127 int r;
5128
5129 if (IS_ERR(uaddr))
5130 return PTR_ERR(uaddr);
5131
5132 switch (attr->attr) {
5133 case KVM_VCPU_TSC_OFFSET: {
5134 u64 offset, tsc, ns;
5135 unsigned long flags;
5136 bool matched;
5137
5138 r = -EFAULT;
5139 if (get_user(offset, uaddr))
5140 break;
5141
5142 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
5143
5144 matched = (vcpu->arch.virtual_tsc_khz &&
5145 kvm->arch.last_tsc_khz == vcpu->arch.virtual_tsc_khz &&
5146 kvm->arch.last_tsc_offset == offset);
5147
5148 tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio) + offset;
5149 ns = get_kvmclock_base_ns();
5150
5151 __kvm_synchronize_tsc(vcpu, offset, tsc, ns, matched);
5152 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
5153
5154 r = 0;
5155 break;
5156 }
5157 default:
5158 r = -ENXIO;
5159 }
5160
5161 return r;
5162}
5163
5164static int kvm_vcpu_ioctl_device_attr(struct kvm_vcpu *vcpu,
5165 unsigned int ioctl,
5166 void __user *argp)
5167{
5168 struct kvm_device_attr attr;
5169 int r;
5170
5171 if (copy_from_user(&attr, argp, sizeof(attr)))
5172 return -EFAULT;
5173
5174 if (attr.group != KVM_VCPU_TSC_CTRL)
5175 return -ENXIO;
5176
5177 switch (ioctl) {
5178 case KVM_HAS_DEVICE_ATTR:
5179 r = kvm_arch_tsc_has_attr(vcpu, &attr);
5180 break;
5181 case KVM_GET_DEVICE_ATTR:
5182 r = kvm_arch_tsc_get_attr(vcpu, &attr);
5183 break;
5184 case KVM_SET_DEVICE_ATTR:
5185 r = kvm_arch_tsc_set_attr(vcpu, &attr);
5186 break;
5187 }
5188
5189 return r;
5190}
5191
5192static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5193 struct kvm_enable_cap *cap)
5194{
5195 int r;
5196 uint16_t vmcs_version;
5197 void __user *user_ptr;
5198
5199 if (cap->flags)
5200 return -EINVAL;
5201
5202 switch (cap->cap) {
5203 case KVM_CAP_HYPERV_SYNIC2:
5204 if (cap->args[0])
5205 return -EINVAL;
5206 fallthrough;
5207
5208 case KVM_CAP_HYPERV_SYNIC:
5209 if (!irqchip_in_kernel(vcpu->kvm))
5210 return -EINVAL;
5211 return kvm_hv_activate_synic(vcpu, cap->cap ==
5212 KVM_CAP_HYPERV_SYNIC2);
5213 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
5214 if (!kvm_x86_ops.nested_ops->enable_evmcs)
5215 return -ENOTTY;
5216 r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
5217 if (!r) {
5218 user_ptr = (void __user *)(uintptr_t)cap->args[0];
5219 if (copy_to_user(user_ptr, &vmcs_version,
5220 sizeof(vmcs_version)))
5221 r = -EFAULT;
5222 }
5223 return r;
5224 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
5225 if (!kvm_x86_ops.enable_direct_tlbflush)
5226 return -ENOTTY;
5227
5228 return static_call(kvm_x86_enable_direct_tlbflush)(vcpu);
5229
5230 case KVM_CAP_HYPERV_ENFORCE_CPUID:
5231 return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]);
5232
5233 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
5234 vcpu->arch.pv_cpuid.enforce = cap->args[0];
5235 if (vcpu->arch.pv_cpuid.enforce)
5236 kvm_update_pv_runtime(vcpu);
5237
5238 return 0;
5239 default:
5240 return -EINVAL;
5241 }
5242}
5243
5244long kvm_arch_vcpu_ioctl(struct file *filp,
5245 unsigned int ioctl, unsigned long arg)
5246{
5247 struct kvm_vcpu *vcpu = filp->private_data;
5248 void __user *argp = (void __user *)arg;
5249 int r;
5250 union {
5251 struct kvm_sregs2 *sregs2;
5252 struct kvm_lapic_state *lapic;
5253 struct kvm_xsave *xsave;
5254 struct kvm_xcrs *xcrs;
5255 void *buffer;
5256 } u;
5257
5258 vcpu_load(vcpu);
5259
5260 u.buffer = NULL;
5261 switch (ioctl) {
5262 case KVM_GET_LAPIC: {
5263 r = -EINVAL;
5264 if (!lapic_in_kernel(vcpu))
5265 goto out;
5266 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
5267 GFP_KERNEL_ACCOUNT);
5268
5269 r = -ENOMEM;
5270 if (!u.lapic)
5271 goto out;
5272 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
5273 if (r)
5274 goto out;
5275 r = -EFAULT;
5276 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
5277 goto out;
5278 r = 0;
5279 break;
5280 }
5281 case KVM_SET_LAPIC: {
5282 r = -EINVAL;
5283 if (!lapic_in_kernel(vcpu))
5284 goto out;
5285 u.lapic = memdup_user(argp, sizeof(*u.lapic));
5286 if (IS_ERR(u.lapic)) {
5287 r = PTR_ERR(u.lapic);
5288 goto out_nofree;
5289 }
5290
5291 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
5292 break;
5293 }
5294 case KVM_INTERRUPT: {
5295 struct kvm_interrupt irq;
5296
5297 r = -EFAULT;
5298 if (copy_from_user(&irq, argp, sizeof(irq)))
5299 goto out;
5300 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
5301 break;
5302 }
5303 case KVM_NMI: {
5304 r = kvm_vcpu_ioctl_nmi(vcpu);
5305 break;
5306 }
5307 case KVM_SMI: {
5308 r = kvm_vcpu_ioctl_smi(vcpu);
5309 break;
5310 }
5311 case KVM_SET_CPUID: {
5312 struct kvm_cpuid __user *cpuid_arg = argp;
5313 struct kvm_cpuid cpuid;
5314
5315 r = -EFAULT;
5316 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5317 goto out;
5318 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
5319 break;
5320 }
5321 case KVM_SET_CPUID2: {
5322 struct kvm_cpuid2 __user *cpuid_arg = argp;
5323 struct kvm_cpuid2 cpuid;
5324
5325 r = -EFAULT;
5326 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5327 goto out;
5328 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
5329 cpuid_arg->entries);
5330 break;
5331 }
5332 case KVM_GET_CPUID2: {
5333 struct kvm_cpuid2 __user *cpuid_arg = argp;
5334 struct kvm_cpuid2 cpuid;
5335
5336 r = -EFAULT;
5337 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
5338 goto out;
5339 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
5340 cpuid_arg->entries);
5341 if (r)
5342 goto out;
5343 r = -EFAULT;
5344 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
5345 goto out;
5346 r = 0;
5347 break;
5348 }
5349 case KVM_GET_MSRS: {
5350 int idx = srcu_read_lock(&vcpu->kvm->srcu);
5351 r = msr_io(vcpu, argp, do_get_msr, 1);
5352 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5353 break;
5354 }
5355 case KVM_SET_MSRS: {
5356 int idx = srcu_read_lock(&vcpu->kvm->srcu);
5357 r = msr_io(vcpu, argp, do_set_msr, 0);
5358 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5359 break;
5360 }
5361 case KVM_TPR_ACCESS_REPORTING: {
5362 struct kvm_tpr_access_ctl tac;
5363
5364 r = -EFAULT;
5365 if (copy_from_user(&tac, argp, sizeof(tac)))
5366 goto out;
5367 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
5368 if (r)
5369 goto out;
5370 r = -EFAULT;
5371 if (copy_to_user(argp, &tac, sizeof(tac)))
5372 goto out;
5373 r = 0;
5374 break;
5375 };
5376 case KVM_SET_VAPIC_ADDR: {
5377 struct kvm_vapic_addr va;
5378 int idx;
5379
5380 r = -EINVAL;
5381 if (!lapic_in_kernel(vcpu))
5382 goto out;
5383 r = -EFAULT;
5384 if (copy_from_user(&va, argp, sizeof(va)))
5385 goto out;
5386 idx = srcu_read_lock(&vcpu->kvm->srcu);
5387 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
5388 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5389 break;
5390 }
5391 case KVM_X86_SETUP_MCE: {
5392 u64 mcg_cap;
5393
5394 r = -EFAULT;
5395 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
5396 goto out;
5397 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
5398 break;
5399 }
5400 case KVM_X86_SET_MCE: {
5401 struct kvm_x86_mce mce;
5402
5403 r = -EFAULT;
5404 if (copy_from_user(&mce, argp, sizeof(mce)))
5405 goto out;
5406 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
5407 break;
5408 }
5409 case KVM_GET_VCPU_EVENTS: {
5410 struct kvm_vcpu_events events;
5411
5412 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
5413
5414 r = -EFAULT;
5415 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
5416 break;
5417 r = 0;
5418 break;
5419 }
5420 case KVM_SET_VCPU_EVENTS: {
5421 struct kvm_vcpu_events events;
5422
5423 r = -EFAULT;
5424 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
5425 break;
5426
5427 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
5428 break;
5429 }
5430 case KVM_GET_DEBUGREGS: {
5431 struct kvm_debugregs dbgregs;
5432
5433 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
5434
5435 r = -EFAULT;
5436 if (copy_to_user(argp, &dbgregs,
5437 sizeof(struct kvm_debugregs)))
5438 break;
5439 r = 0;
5440 break;
5441 }
5442 case KVM_SET_DEBUGREGS: {
5443 struct kvm_debugregs dbgregs;
5444
5445 r = -EFAULT;
5446 if (copy_from_user(&dbgregs, argp,
5447 sizeof(struct kvm_debugregs)))
5448 break;
5449
5450 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
5451 break;
5452 }
5453 case KVM_GET_XSAVE: {
5454 r = -EINVAL;
5455 if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
5456 break;
5457
5458 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
5459 r = -ENOMEM;
5460 if (!u.xsave)
5461 break;
5462
5463 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
5464
5465 r = -EFAULT;
5466 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
5467 break;
5468 r = 0;
5469 break;
5470 }
5471 case KVM_SET_XSAVE: {
5472 int size = vcpu->arch.guest_fpu.uabi_size;
5473
5474 u.xsave = memdup_user(argp, size);
5475 if (IS_ERR(u.xsave)) {
5476 r = PTR_ERR(u.xsave);
5477 goto out_nofree;
5478 }
5479
5480 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
5481 break;
5482 }
5483
5484 case KVM_GET_XSAVE2: {
5485 int size = vcpu->arch.guest_fpu.uabi_size;
5486
5487 u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
5488 r = -ENOMEM;
5489 if (!u.xsave)
5490 break;
5491
5492 kvm_vcpu_ioctl_x86_get_xsave2(vcpu, u.buffer, size);
5493
5494 r = -EFAULT;
5495 if (copy_to_user(argp, u.xsave, size))
5496 break;
5497
5498 r = 0;
5499 break;
5500 }
5501
5502 case KVM_GET_XCRS: {
5503 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
5504 r = -ENOMEM;
5505 if (!u.xcrs)
5506 break;
5507
5508 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
5509
5510 r = -EFAULT;
5511 if (copy_to_user(argp, u.xcrs,
5512 sizeof(struct kvm_xcrs)))
5513 break;
5514 r = 0;
5515 break;
5516 }
5517 case KVM_SET_XCRS: {
5518 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
5519 if (IS_ERR(u.xcrs)) {
5520 r = PTR_ERR(u.xcrs);
5521 goto out_nofree;
5522 }
5523
5524 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
5525 break;
5526 }
5527 case KVM_SET_TSC_KHZ: {
5528 u32 user_tsc_khz;
5529
5530 r = -EINVAL;
5531 user_tsc_khz = (u32)arg;
5532
5533 if (kvm_has_tsc_control &&
5534 user_tsc_khz >= kvm_max_guest_tsc_khz)
5535 goto out;
5536
5537 if (user_tsc_khz == 0)
5538 user_tsc_khz = tsc_khz;
5539
5540 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
5541 r = 0;
5542
5543 goto out;
5544 }
5545 case KVM_GET_TSC_KHZ: {
5546 r = vcpu->arch.virtual_tsc_khz;
5547 goto out;
5548 }
5549 case KVM_KVMCLOCK_CTRL: {
5550 r = kvm_set_guest_paused(vcpu);
5551 goto out;
5552 }
5553 case KVM_ENABLE_CAP: {
5554 struct kvm_enable_cap cap;
5555
5556 r = -EFAULT;
5557 if (copy_from_user(&cap, argp, sizeof(cap)))
5558 goto out;
5559 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5560 break;
5561 }
5562 case KVM_GET_NESTED_STATE: {
5563 struct kvm_nested_state __user *user_kvm_nested_state = argp;
5564 u32 user_data_size;
5565
5566 r = -EINVAL;
5567 if (!kvm_x86_ops.nested_ops->get_state)
5568 break;
5569
5570 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
5571 r = -EFAULT;
5572 if (get_user(user_data_size, &user_kvm_nested_state->size))
5573 break;
5574
5575 r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
5576 user_data_size);
5577 if (r < 0)
5578 break;
5579
5580 if (r > user_data_size) {
5581 if (put_user(r, &user_kvm_nested_state->size))
5582 r = -EFAULT;
5583 else
5584 r = -E2BIG;
5585 break;
5586 }
5587
5588 r = 0;
5589 break;
5590 }
5591 case KVM_SET_NESTED_STATE: {
5592 struct kvm_nested_state __user *user_kvm_nested_state = argp;
5593 struct kvm_nested_state kvm_state;
5594 int idx;
5595
5596 r = -EINVAL;
5597 if (!kvm_x86_ops.nested_ops->set_state)
5598 break;
5599
5600 r = -EFAULT;
5601 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
5602 break;
5603
5604 r = -EINVAL;
5605 if (kvm_state.size < sizeof(kvm_state))
5606 break;
5607
5608 if (kvm_state.flags &
5609 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
5610 | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
5611 | KVM_STATE_NESTED_GIF_SET))
5612 break;
5613
5614
5615 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
5616 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
5617 break;
5618
5619 idx = srcu_read_lock(&vcpu->kvm->srcu);
5620 r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
5621 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5622 break;
5623 }
5624 case KVM_GET_SUPPORTED_HV_CPUID:
5625 r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
5626 break;
5627#ifdef CONFIG_KVM_XEN
5628 case KVM_XEN_VCPU_GET_ATTR: {
5629 struct kvm_xen_vcpu_attr xva;
5630
5631 r = -EFAULT;
5632 if (copy_from_user(&xva, argp, sizeof(xva)))
5633 goto out;
5634 r = kvm_xen_vcpu_get_attr(vcpu, &xva);
5635 if (!r && copy_to_user(argp, &xva, sizeof(xva)))
5636 r = -EFAULT;
5637 break;
5638 }
5639 case KVM_XEN_VCPU_SET_ATTR: {
5640 struct kvm_xen_vcpu_attr xva;
5641
5642 r = -EFAULT;
5643 if (copy_from_user(&xva, argp, sizeof(xva)))
5644 goto out;
5645 r = kvm_xen_vcpu_set_attr(vcpu, &xva);
5646 break;
5647 }
5648#endif
5649 case KVM_GET_SREGS2: {
5650 u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL);
5651 r = -ENOMEM;
5652 if (!u.sregs2)
5653 goto out;
5654 __get_sregs2(vcpu, u.sregs2);
5655 r = -EFAULT;
5656 if (copy_to_user(argp, u.sregs2, sizeof(struct kvm_sregs2)))
5657 goto out;
5658 r = 0;
5659 break;
5660 }
5661 case KVM_SET_SREGS2: {
5662 u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2));
5663 if (IS_ERR(u.sregs2)) {
5664 r = PTR_ERR(u.sregs2);
5665 u.sregs2 = NULL;
5666 goto out;
5667 }
5668 r = __set_sregs2(vcpu, u.sregs2);
5669 break;
5670 }
5671 case KVM_HAS_DEVICE_ATTR:
5672 case KVM_GET_DEVICE_ATTR:
5673 case KVM_SET_DEVICE_ATTR:
5674 r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp);
5675 break;
5676 default:
5677 r = -EINVAL;
5678 }
5679out:
5680 kfree(u.buffer);
5681out_nofree:
5682 vcpu_put(vcpu);
5683 return r;
5684}
5685
5686vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5687{
5688 return VM_FAULT_SIGBUS;
5689}
5690
5691static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
5692{
5693 int ret;
5694
5695 if (addr > (unsigned int)(-3 * PAGE_SIZE))
5696 return -EINVAL;
5697 ret = static_call(kvm_x86_set_tss_addr)(kvm, addr);
5698 return ret;
5699}
5700
5701static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
5702 u64 ident_addr)
5703{
5704 return static_call(kvm_x86_set_identity_map_addr)(kvm, ident_addr);
5705}
5706
5707static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
5708 unsigned long kvm_nr_mmu_pages)
5709{
5710 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
5711 return -EINVAL;
5712
5713 mutex_lock(&kvm->slots_lock);
5714
5715 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
5716 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
5717
5718 mutex_unlock(&kvm->slots_lock);
5719 return 0;
5720}
5721
5722static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
5723{
5724 return kvm->arch.n_max_mmu_pages;
5725}
5726
5727static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5728{
5729 struct kvm_pic *pic = kvm->arch.vpic;
5730 int r;
5731
5732 r = 0;
5733 switch (chip->chip_id) {
5734 case KVM_IRQCHIP_PIC_MASTER:
5735 memcpy(&chip->chip.pic, &pic->pics[0],
5736 sizeof(struct kvm_pic_state));
5737 break;
5738 case KVM_IRQCHIP_PIC_SLAVE:
5739 memcpy(&chip->chip.pic, &pic->pics[1],
5740 sizeof(struct kvm_pic_state));
5741 break;
5742 case KVM_IRQCHIP_IOAPIC:
5743 kvm_get_ioapic(kvm, &chip->chip.ioapic);
5744 break;
5745 default:
5746 r = -EINVAL;
5747 break;
5748 }
5749 return r;
5750}
5751
5752static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5753{
5754 struct kvm_pic *pic = kvm->arch.vpic;
5755 int r;
5756
5757 r = 0;
5758 switch (chip->chip_id) {
5759 case KVM_IRQCHIP_PIC_MASTER:
5760 spin_lock(&pic->lock);
5761 memcpy(&pic->pics[0], &chip->chip.pic,
5762 sizeof(struct kvm_pic_state));
5763 spin_unlock(&pic->lock);
5764 break;
5765 case KVM_IRQCHIP_PIC_SLAVE:
5766 spin_lock(&pic->lock);
5767 memcpy(&pic->pics[1], &chip->chip.pic,
5768 sizeof(struct kvm_pic_state));
5769 spin_unlock(&pic->lock);
5770 break;
5771 case KVM_IRQCHIP_IOAPIC:
5772 kvm_set_ioapic(kvm, &chip->chip.ioapic);
5773 break;
5774 default:
5775 r = -EINVAL;
5776 break;
5777 }
5778 kvm_pic_update_irq(pic);
5779 return r;
5780}
5781
5782static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
5783{
5784 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
5785
5786 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
5787
5788 mutex_lock(&kps->lock);
5789 memcpy(ps, &kps->channels, sizeof(*ps));
5790 mutex_unlock(&kps->lock);
5791 return 0;
5792}
5793
5794static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
5795{
5796 int i;
5797 struct kvm_pit *pit = kvm->arch.vpit;
5798
5799 mutex_lock(&pit->pit_state.lock);
5800 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
5801 for (i = 0; i < 3; i++)
5802 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
5803 mutex_unlock(&pit->pit_state.lock);
5804 return 0;
5805}
5806
5807static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
5808{
5809 mutex_lock(&kvm->arch.vpit->pit_state.lock);
5810 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
5811 sizeof(ps->channels));
5812 ps->flags = kvm->arch.vpit->pit_state.flags;
5813 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
5814 memset(&ps->reserved, 0, sizeof(ps->reserved));
5815 return 0;
5816}
5817
5818static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
5819{
5820 int start = 0;
5821 int i;
5822 u32 prev_legacy, cur_legacy;
5823 struct kvm_pit *pit = kvm->arch.vpit;
5824
5825 mutex_lock(&pit->pit_state.lock);
5826 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
5827 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
5828 if (!prev_legacy && cur_legacy)
5829 start = 1;
5830 memcpy(&pit->pit_state.channels, &ps->channels,
5831 sizeof(pit->pit_state.channels));
5832 pit->pit_state.flags = ps->flags;
5833 for (i = 0; i < 3; i++)
5834 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
5835 start && i == 0);
5836 mutex_unlock(&pit->pit_state.lock);
5837 return 0;
5838}
5839
5840static int kvm_vm_ioctl_reinject(struct kvm *kvm,
5841 struct kvm_reinject_control *control)
5842{
5843 struct kvm_pit *pit = kvm->arch.vpit;
5844
5845
5846
5847
5848
5849 mutex_lock(&pit->pit_state.lock);
5850 kvm_pit_set_reinject(pit, control->pit_reinject);
5851 mutex_unlock(&pit->pit_state.lock);
5852
5853 return 0;
5854}
5855
5856void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
5857{
5858
5859
5860
5861
5862
5863
5864
5865 struct kvm_vcpu *vcpu;
5866 unsigned long i;
5867
5868 kvm_for_each_vcpu(i, vcpu, kvm)
5869 kvm_vcpu_kick(vcpu);
5870}
5871
5872int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
5873 bool line_status)
5874{
5875 if (!irqchip_in_kernel(kvm))
5876 return -ENXIO;
5877
5878 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
5879 irq_event->irq, irq_event->level,
5880 line_status);
5881 return 0;
5882}
5883
5884int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
5885 struct kvm_enable_cap *cap)
5886{
5887 int r;
5888
5889 if (cap->flags)
5890 return -EINVAL;
5891
5892 switch (cap->cap) {
5893 case KVM_CAP_DISABLE_QUIRKS:
5894 kvm->arch.disabled_quirks = cap->args[0];
5895 r = 0;
5896 break;
5897 case KVM_CAP_SPLIT_IRQCHIP: {
5898 mutex_lock(&kvm->lock);
5899 r = -EINVAL;
5900 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
5901 goto split_irqchip_unlock;
5902 r = -EEXIST;
5903 if (irqchip_in_kernel(kvm))
5904 goto split_irqchip_unlock;
5905 if (kvm->created_vcpus)
5906 goto split_irqchip_unlock;
5907 r = kvm_setup_empty_irq_routing(kvm);
5908 if (r)
5909 goto split_irqchip_unlock;
5910
5911 smp_wmb();
5912 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
5913 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
5914 kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
5915 r = 0;
5916split_irqchip_unlock:
5917 mutex_unlock(&kvm->lock);
5918 break;
5919 }
5920 case KVM_CAP_X2APIC_API:
5921 r = -EINVAL;
5922 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
5923 break;
5924
5925 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
5926 kvm->arch.x2apic_format = true;
5927 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
5928 kvm->arch.x2apic_broadcast_quirk_disabled = true;
5929
5930 r = 0;
5931 break;
5932 case KVM_CAP_X86_DISABLE_EXITS:
5933 r = -EINVAL;
5934 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
5935 break;
5936
5937 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
5938 kvm_can_mwait_in_guest())
5939 kvm->arch.mwait_in_guest = true;
5940 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
5941 kvm->arch.hlt_in_guest = true;
5942 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
5943 kvm->arch.pause_in_guest = true;
5944 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
5945 kvm->arch.cstate_in_guest = true;
5946 r = 0;
5947 break;
5948 case KVM_CAP_MSR_PLATFORM_INFO:
5949 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
5950 r = 0;
5951 break;
5952 case KVM_CAP_EXCEPTION_PAYLOAD:
5953 kvm->arch.exception_payload_enabled = cap->args[0];
5954 r = 0;
5955 break;
5956 case KVM_CAP_X86_USER_SPACE_MSR:
5957 kvm->arch.user_space_msr_mask = cap->args[0];
5958 r = 0;
5959 break;
5960 case KVM_CAP_X86_BUS_LOCK_EXIT:
5961 r = -EINVAL;
5962 if (cap->args[0] & ~KVM_BUS_LOCK_DETECTION_VALID_MODE)
5963 break;
5964
5965 if ((cap->args[0] & KVM_BUS_LOCK_DETECTION_OFF) &&
5966 (cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT))
5967 break;
5968
5969 if (kvm_has_bus_lock_exit &&
5970 cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)
5971 kvm->arch.bus_lock_detection_enabled = true;
5972 r = 0;
5973 break;
5974#ifdef CONFIG_X86_SGX_KVM
5975 case KVM_CAP_SGX_ATTRIBUTE: {
5976 unsigned long allowed_attributes = 0;
5977
5978 r = sgx_set_attribute(&allowed_attributes, cap->args[0]);
5979 if (r)
5980 break;
5981
5982
5983 if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) &&
5984 !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY))
5985 kvm->arch.sgx_provisioning_allowed = true;
5986 else
5987 r = -EINVAL;
5988 break;
5989 }
5990#endif
5991 case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
5992 r = -EINVAL;
5993 if (kvm_x86_ops.vm_copy_enc_context_from)
5994 r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
5995 return r;
5996 case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
5997 r = -EINVAL;
5998 if (kvm_x86_ops.vm_move_enc_context_from)
5999 r = kvm_x86_ops.vm_move_enc_context_from(
6000 kvm, cap->args[0]);
6001 return r;
6002 case KVM_CAP_EXIT_HYPERCALL:
6003 if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
6004 r = -EINVAL;
6005 break;
6006 }
6007 kvm->arch.hypercall_exit_enabled = cap->args[0];
6008 r = 0;
6009 break;
6010 case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
6011 r = -EINVAL;
6012 if (cap->args[0] & ~1)
6013 break;
6014 kvm->arch.exit_on_emulation_error = cap->args[0];
6015 r = 0;
6016 break;
6017 default:
6018 r = -EINVAL;
6019 break;
6020 }
6021 return r;
6022}
6023
6024static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
6025{
6026 struct kvm_x86_msr_filter *msr_filter;
6027
6028 msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
6029 if (!msr_filter)
6030 return NULL;
6031
6032 msr_filter->default_allow = default_allow;
6033 return msr_filter;
6034}
6035
6036static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
6037{
6038 u32 i;
6039
6040 if (!msr_filter)
6041 return;
6042
6043 for (i = 0; i < msr_filter->count; i++)
6044 kfree(msr_filter->ranges[i].bitmap);
6045
6046 kfree(msr_filter);
6047}
6048
6049static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
6050 struct kvm_msr_filter_range *user_range)
6051{
6052 unsigned long *bitmap = NULL;
6053 size_t bitmap_size;
6054
6055 if (!user_range->nmsrs)
6056 return 0;
6057
6058 if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
6059 return -EINVAL;
6060
6061 if (!user_range->flags)
6062 return -EINVAL;
6063
6064 bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
6065 if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
6066 return -EINVAL;
6067
6068 bitmap = memdup_user((__user u8*)user_range->bitmap, bitmap_size);
6069 if (IS_ERR(bitmap))
6070 return PTR_ERR(bitmap);
6071
6072 msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
6073 .flags = user_range->flags,
6074 .base = user_range->base,
6075 .nmsrs = user_range->nmsrs,
6076 .bitmap = bitmap,
6077 };
6078
6079 msr_filter->count++;
6080 return 0;
6081}
6082
6083static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
6084{
6085 struct kvm_msr_filter __user *user_msr_filter = argp;
6086 struct kvm_x86_msr_filter *new_filter, *old_filter;
6087 struct kvm_msr_filter filter;
6088 bool default_allow;
6089 bool empty = true;
6090 int r = 0;
6091 u32 i;
6092
6093 if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
6094 return -EFAULT;
6095
6096 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
6097 empty &= !filter.ranges[i].nmsrs;
6098
6099 default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
6100 if (empty && !default_allow)
6101 return -EINVAL;
6102
6103 new_filter = kvm_alloc_msr_filter(default_allow);
6104 if (!new_filter)
6105 return -ENOMEM;
6106
6107 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
6108 r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
6109 if (r) {
6110 kvm_free_msr_filter(new_filter);
6111 return r;
6112 }
6113 }
6114
6115 mutex_lock(&kvm->lock);
6116
6117
6118 old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
6119
6120 rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
6121 synchronize_srcu(&kvm->srcu);
6122
6123 kvm_free_msr_filter(old_filter);
6124
6125 kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
6126 mutex_unlock(&kvm->lock);
6127
6128 return 0;
6129}
6130
6131#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
6132static int kvm_arch_suspend_notifier(struct kvm *kvm)
6133{
6134 struct kvm_vcpu *vcpu;
6135 unsigned long i;
6136 int ret = 0;
6137
6138 mutex_lock(&kvm->lock);
6139 kvm_for_each_vcpu(i, vcpu, kvm) {
6140 if (!vcpu->arch.pv_time_enabled)
6141 continue;
6142
6143 ret = kvm_set_guest_paused(vcpu);
6144 if (ret) {
6145 kvm_err("Failed to pause guest VCPU%d: %d\n",
6146 vcpu->vcpu_id, ret);
6147 break;
6148 }
6149 }
6150 mutex_unlock(&kvm->lock);
6151
6152 return ret ? NOTIFY_BAD : NOTIFY_DONE;
6153}
6154
6155int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
6156{
6157 switch (state) {
6158 case PM_HIBERNATION_PREPARE:
6159 case PM_SUSPEND_PREPARE:
6160 return kvm_arch_suspend_notifier(kvm);
6161 }
6162
6163 return NOTIFY_DONE;
6164}
6165#endif
6166
6167static int kvm_vm_ioctl_get_clock(struct kvm *kvm, void __user *argp)
6168{
6169 struct kvm_clock_data data = { 0 };
6170
6171 get_kvmclock(kvm, &data);
6172 if (copy_to_user(argp, &data, sizeof(data)))
6173 return -EFAULT;
6174
6175 return 0;
6176}
6177
6178static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp)
6179{
6180 struct kvm_arch *ka = &kvm->arch;
6181 struct kvm_clock_data data;
6182 u64 now_raw_ns;
6183
6184 if (copy_from_user(&data, argp, sizeof(data)))
6185 return -EFAULT;
6186
6187
6188
6189
6190
6191 if (data.flags & ~KVM_CLOCK_VALID_FLAGS)
6192 return -EINVAL;
6193
6194 kvm_hv_invalidate_tsc_page(kvm);
6195 kvm_start_pvclock_update(kvm);
6196 pvclock_update_vm_gtod_copy(kvm);
6197
6198
6199
6200
6201
6202
6203
6204
6205 if (data.flags & KVM_CLOCK_REALTIME) {
6206 u64 now_real_ns = ktime_get_real_ns();
6207
6208
6209
6210
6211 if (now_real_ns > data.realtime)
6212 data.clock += now_real_ns - data.realtime;
6213 }
6214
6215 if (ka->use_master_clock)
6216 now_raw_ns = ka->master_kernel_ns;
6217 else
6218 now_raw_ns = get_kvmclock_base_ns();
6219 ka->kvmclock_offset = data.clock - now_raw_ns;
6220 kvm_end_pvclock_update(kvm);
6221 return 0;
6222}
6223
6224long kvm_arch_vm_ioctl(struct file *filp,
6225 unsigned int ioctl, unsigned long arg)
6226{
6227 struct kvm *kvm = filp->private_data;
6228 void __user *argp = (void __user *)arg;
6229 int r = -ENOTTY;
6230
6231
6232
6233
6234
6235 union {
6236 struct kvm_pit_state ps;
6237 struct kvm_pit_state2 ps2;
6238 struct kvm_pit_config pit_config;
6239 } u;
6240
6241 switch (ioctl) {
6242 case KVM_SET_TSS_ADDR:
6243 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
6244 break;
6245 case KVM_SET_IDENTITY_MAP_ADDR: {
6246 u64 ident_addr;
6247
6248 mutex_lock(&kvm->lock);
6249 r = -EINVAL;
6250 if (kvm->created_vcpus)
6251 goto set_identity_unlock;
6252 r = -EFAULT;
6253 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
6254 goto set_identity_unlock;
6255 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
6256set_identity_unlock:
6257 mutex_unlock(&kvm->lock);
6258 break;
6259 }
6260 case KVM_SET_NR_MMU_PAGES:
6261 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
6262 break;
6263 case KVM_GET_NR_MMU_PAGES:
6264 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
6265 break;
6266 case KVM_CREATE_IRQCHIP: {
6267 mutex_lock(&kvm->lock);
6268
6269 r = -EEXIST;
6270 if (irqchip_in_kernel(kvm))
6271 goto create_irqchip_unlock;
6272
6273 r = -EINVAL;
6274 if (kvm->created_vcpus)
6275 goto create_irqchip_unlock;
6276
6277 r = kvm_pic_init(kvm);
6278 if (r)
6279 goto create_irqchip_unlock;
6280
6281 r = kvm_ioapic_init(kvm);
6282 if (r) {
6283 kvm_pic_destroy(kvm);
6284 goto create_irqchip_unlock;
6285 }
6286
6287 r = kvm_setup_default_irq_routing(kvm);
6288 if (r) {
6289 kvm_ioapic_destroy(kvm);
6290 kvm_pic_destroy(kvm);
6291 goto create_irqchip_unlock;
6292 }
6293
6294 smp_wmb();
6295 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
6296 kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
6297 create_irqchip_unlock:
6298 mutex_unlock(&kvm->lock);
6299 break;
6300 }
6301 case KVM_CREATE_PIT:
6302 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
6303 goto create_pit;
6304 case KVM_CREATE_PIT2:
6305 r = -EFAULT;
6306 if (copy_from_user(&u.pit_config, argp,
6307 sizeof(struct kvm_pit_config)))
6308 goto out;
6309 create_pit:
6310 mutex_lock(&kvm->lock);
6311 r = -EEXIST;
6312 if (kvm->arch.vpit)
6313 goto create_pit_unlock;
6314 r = -ENOMEM;
6315 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
6316 if (kvm->arch.vpit)
6317 r = 0;
6318 create_pit_unlock:
6319 mutex_unlock(&kvm->lock);
6320 break;
6321 case KVM_GET_IRQCHIP: {
6322
6323 struct kvm_irqchip *chip;
6324
6325 chip = memdup_user(argp, sizeof(*chip));
6326 if (IS_ERR(chip)) {
6327 r = PTR_ERR(chip);
6328 goto out;
6329 }
6330
6331 r = -ENXIO;
6332 if (!irqchip_kernel(kvm))
6333 goto get_irqchip_out;
6334 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
6335 if (r)
6336 goto get_irqchip_out;
6337 r = -EFAULT;
6338 if (copy_to_user(argp, chip, sizeof(*chip)))
6339 goto get_irqchip_out;
6340 r = 0;
6341 get_irqchip_out:
6342 kfree(chip);
6343 break;
6344 }
6345 case KVM_SET_IRQCHIP: {
6346
6347 struct kvm_irqchip *chip;
6348
6349 chip = memdup_user(argp, sizeof(*chip));
6350 if (IS_ERR(chip)) {
6351 r = PTR_ERR(chip);
6352 goto out;
6353 }
6354
6355 r = -ENXIO;
6356 if (!irqchip_kernel(kvm))
6357 goto set_irqchip_out;
6358 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
6359 set_irqchip_out:
6360 kfree(chip);
6361 break;
6362 }
6363 case KVM_GET_PIT: {
6364 r = -EFAULT;
6365 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
6366 goto out;
6367 r = -ENXIO;
6368 if (!kvm->arch.vpit)
6369 goto out;
6370 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
6371 if (r)
6372 goto out;
6373 r = -EFAULT;
6374 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
6375 goto out;
6376 r = 0;
6377 break;
6378 }
6379 case KVM_SET_PIT: {
6380 r = -EFAULT;
6381 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
6382 goto out;
6383 mutex_lock(&kvm->lock);
6384 r = -ENXIO;
6385 if (!kvm->arch.vpit)
6386 goto set_pit_out;
6387 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
6388set_pit_out:
6389 mutex_unlock(&kvm->lock);
6390 break;
6391 }
6392 case KVM_GET_PIT2: {
6393 r = -ENXIO;
6394 if (!kvm->arch.vpit)
6395 goto out;
6396 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
6397 if (r)
6398 goto out;
6399 r = -EFAULT;
6400 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
6401 goto out;
6402 r = 0;
6403 break;
6404 }
6405 case KVM_SET_PIT2: {
6406 r = -EFAULT;
6407 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
6408 goto out;
6409 mutex_lock(&kvm->lock);
6410 r = -ENXIO;
6411 if (!kvm->arch.vpit)
6412 goto set_pit2_out;
6413 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
6414set_pit2_out:
6415 mutex_unlock(&kvm->lock);
6416 break;
6417 }
6418 case KVM_REINJECT_CONTROL: {
6419 struct kvm_reinject_control control;
6420 r = -EFAULT;
6421 if (copy_from_user(&control, argp, sizeof(control)))
6422 goto out;
6423 r = -ENXIO;
6424 if (!kvm->arch.vpit)
6425 goto out;
6426 r = kvm_vm_ioctl_reinject(kvm, &control);
6427 break;
6428 }
6429 case KVM_SET_BOOT_CPU_ID:
6430 r = 0;
6431 mutex_lock(&kvm->lock);
6432 if (kvm->created_vcpus)
6433 r = -EBUSY;
6434 else
6435 kvm->arch.bsp_vcpu_id = arg;
6436 mutex_unlock(&kvm->lock);
6437 break;
6438#ifdef CONFIG_KVM_XEN
6439 case KVM_XEN_HVM_CONFIG: {
6440 struct kvm_xen_hvm_config xhc;
6441 r = -EFAULT;
6442 if (copy_from_user(&xhc, argp, sizeof(xhc)))
6443 goto out;
6444 r = kvm_xen_hvm_config(kvm, &xhc);
6445 break;
6446 }
6447 case KVM_XEN_HVM_GET_ATTR: {
6448 struct kvm_xen_hvm_attr xha;
6449
6450 r = -EFAULT;
6451 if (copy_from_user(&xha, argp, sizeof(xha)))
6452 goto out;
6453 r = kvm_xen_hvm_get_attr(kvm, &xha);
6454 if (!r && copy_to_user(argp, &xha, sizeof(xha)))
6455 r = -EFAULT;
6456 break;
6457 }
6458 case KVM_XEN_HVM_SET_ATTR: {
6459 struct kvm_xen_hvm_attr xha;
6460
6461 r = -EFAULT;
6462 if (copy_from_user(&xha, argp, sizeof(xha)))
6463 goto out;
6464 r = kvm_xen_hvm_set_attr(kvm, &xha);
6465 break;
6466 }
6467#endif
6468 case KVM_SET_CLOCK:
6469 r = kvm_vm_ioctl_set_clock(kvm, argp);
6470 break;
6471 case KVM_GET_CLOCK:
6472 r = kvm_vm_ioctl_get_clock(kvm, argp);
6473 break;
6474 case KVM_MEMORY_ENCRYPT_OP: {
6475 r = -ENOTTY;
6476 if (kvm_x86_ops.mem_enc_op)
6477 r = static_call(kvm_x86_mem_enc_op)(kvm, argp);
6478 break;
6479 }
6480 case KVM_MEMORY_ENCRYPT_REG_REGION: {
6481 struct kvm_enc_region region;
6482
6483 r = -EFAULT;
6484 if (copy_from_user(®ion, argp, sizeof(region)))
6485 goto out;
6486
6487 r = -ENOTTY;
6488 if (kvm_x86_ops.mem_enc_reg_region)
6489 r = static_call(kvm_x86_mem_enc_reg_region)(kvm, ®ion);
6490 break;
6491 }
6492 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
6493 struct kvm_enc_region region;
6494
6495 r = -EFAULT;
6496 if (copy_from_user(®ion, argp, sizeof(region)))
6497 goto out;
6498
6499 r = -ENOTTY;
6500 if (kvm_x86_ops.mem_enc_unreg_region)
6501 r = static_call(kvm_x86_mem_enc_unreg_region)(kvm, ®ion);
6502 break;
6503 }
6504 case KVM_HYPERV_EVENTFD: {
6505 struct kvm_hyperv_eventfd hvevfd;
6506
6507 r = -EFAULT;
6508 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
6509 goto out;
6510 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
6511 break;
6512 }
6513 case KVM_SET_PMU_EVENT_FILTER:
6514 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
6515 break;
6516 case KVM_X86_SET_MSR_FILTER:
6517 r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
6518 break;
6519 default:
6520 r = -ENOTTY;
6521 }
6522out:
6523 return r;
6524}
6525
6526static void kvm_init_msr_list(void)
6527{
6528 struct x86_pmu_capability x86_pmu;
6529 u32 dummy[2];
6530 unsigned i;
6531
6532 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
6533 "Please update the fixed PMCs in msrs_to_saved_all[]");
6534
6535 perf_get_x86_pmu_capability(&x86_pmu);
6536
6537 num_msrs_to_save = 0;
6538 num_emulated_msrs = 0;
6539 num_msr_based_features = 0;
6540
6541 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
6542 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
6543 continue;
6544
6545
6546
6547
6548
6549 switch (msrs_to_save_all[i]) {
6550 case MSR_IA32_BNDCFGS:
6551 if (!kvm_mpx_supported())
6552 continue;
6553 break;
6554 case MSR_TSC_AUX:
6555 if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
6556 !kvm_cpu_cap_has(X86_FEATURE_RDPID))
6557 continue;
6558 break;
6559 case MSR_IA32_UMWAIT_CONTROL:
6560 if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
6561 continue;
6562 break;
6563 case MSR_IA32_RTIT_CTL:
6564 case MSR_IA32_RTIT_STATUS:
6565 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
6566 continue;
6567 break;
6568 case MSR_IA32_RTIT_CR3_MATCH:
6569 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6570 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
6571 continue;
6572 break;
6573 case MSR_IA32_RTIT_OUTPUT_BASE:
6574 case MSR_IA32_RTIT_OUTPUT_MASK:
6575 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6576 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
6577 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
6578 continue;
6579 break;
6580 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
6581 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
6582 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
6583 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
6584 continue;
6585 break;
6586 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
6587 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
6588 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
6589 continue;
6590 break;
6591 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
6592 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
6593 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
6594 continue;
6595 break;
6596 case MSR_IA32_XFD:
6597 case MSR_IA32_XFD_ERR:
6598 if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
6599 continue;
6600 break;
6601 default:
6602 break;
6603 }
6604
6605 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
6606 }
6607
6608 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
6609 if (!static_call(kvm_x86_has_emulated_msr)(NULL, emulated_msrs_all[i]))
6610 continue;
6611
6612 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
6613 }
6614
6615 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
6616 struct kvm_msr_entry msr;
6617
6618 msr.index = msr_based_features_all[i];
6619 if (kvm_get_msr_feature(&msr))
6620 continue;
6621
6622 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
6623 }
6624}
6625
6626static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
6627 const void *v)
6628{
6629 int handled = 0;
6630 int n;
6631
6632 do {
6633 n = min(len, 8);
6634 if (!(lapic_in_kernel(vcpu) &&
6635 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
6636 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
6637 break;
6638 handled += n;
6639 addr += n;
6640 len -= n;
6641 v += n;
6642 } while (len);
6643
6644 return handled;
6645}
6646
6647static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
6648{
6649 int handled = 0;
6650 int n;
6651
6652 do {
6653 n = min(len, 8);
6654 if (!(lapic_in_kernel(vcpu) &&
6655 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
6656 addr, n, v))
6657 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
6658 break;
6659 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
6660 handled += n;
6661 addr += n;
6662 len -= n;
6663 v += n;
6664 } while (len);
6665
6666 return handled;
6667}
6668
6669static void kvm_set_segment(struct kvm_vcpu *vcpu,
6670 struct kvm_segment *var, int seg)
6671{
6672 static_call(kvm_x86_set_segment)(vcpu, var, seg);
6673}
6674
6675void kvm_get_segment(struct kvm_vcpu *vcpu,
6676 struct kvm_segment *var, int seg)
6677{
6678 static_call(kvm_x86_get_segment)(vcpu, var, seg);
6679}
6680
6681gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
6682 struct x86_exception *exception)
6683{
6684 struct kvm_mmu *mmu = vcpu->arch.mmu;
6685 gpa_t t_gpa;
6686
6687 BUG_ON(!mmu_is_nested(vcpu));
6688
6689
6690 access |= PFERR_USER_MASK;
6691 t_gpa = mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
6692
6693 return t_gpa;
6694}
6695
6696gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
6697 struct x86_exception *exception)
6698{
6699 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
6700
6701 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6702 return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
6703}
6704EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
6705
6706 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
6707 struct x86_exception *exception)
6708{
6709 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
6710
6711 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6712 access |= PFERR_FETCH_MASK;
6713 return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
6714}
6715
6716gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
6717 struct x86_exception *exception)
6718{
6719 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
6720
6721 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6722 access |= PFERR_WRITE_MASK;
6723 return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
6724}
6725EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
6726
6727
6728gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
6729 struct x86_exception *exception)
6730{
6731 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
6732
6733 return mmu->gva_to_gpa(vcpu, mmu, gva, 0, exception);
6734}
6735
6736static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
6737 struct kvm_vcpu *vcpu, u32 access,
6738 struct x86_exception *exception)
6739{
6740 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
6741 void *data = val;
6742 int r = X86EMUL_CONTINUE;
6743
6744 while (bytes) {
6745 gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access, exception);
6746 unsigned offset = addr & (PAGE_SIZE-1);
6747 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
6748 int ret;
6749
6750 if (gpa == UNMAPPED_GVA)
6751 return X86EMUL_PROPAGATE_FAULT;
6752 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
6753 offset, toread);
6754 if (ret < 0) {
6755 r = X86EMUL_IO_NEEDED;
6756 goto out;
6757 }
6758
6759 bytes -= toread;
6760 data += toread;
6761 addr += toread;
6762 }
6763out:
6764 return r;
6765}
6766
6767
6768static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
6769 gva_t addr, void *val, unsigned int bytes,
6770 struct x86_exception *exception)
6771{
6772 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6773 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
6774 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6775 unsigned offset;
6776 int ret;
6777
6778
6779 gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access|PFERR_FETCH_MASK,
6780 exception);
6781 if (unlikely(gpa == UNMAPPED_GVA))
6782 return X86EMUL_PROPAGATE_FAULT;
6783
6784 offset = addr & (PAGE_SIZE-1);
6785 if (WARN_ON(offset + bytes > PAGE_SIZE))
6786 bytes = (unsigned)PAGE_SIZE - offset;
6787 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
6788 offset, bytes);
6789 if (unlikely(ret < 0))
6790 return X86EMUL_IO_NEEDED;
6791
6792 return X86EMUL_CONTINUE;
6793}
6794
6795int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
6796 gva_t addr, void *val, unsigned int bytes,
6797 struct x86_exception *exception)
6798{
6799 u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
6800
6801
6802
6803
6804
6805
6806
6807 memset(exception, 0, sizeof(*exception));
6808 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
6809 exception);
6810}
6811EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
6812
6813static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
6814 gva_t addr, void *val, unsigned int bytes,
6815 struct x86_exception *exception, bool system)
6816{
6817 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6818 u32 access = 0;
6819
6820 if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3)
6821 access |= PFERR_USER_MASK;
6822
6823 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
6824}
6825
6826static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
6827 unsigned long addr, void *val, unsigned int bytes)
6828{
6829 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6830 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
6831
6832 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
6833}
6834
6835static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
6836 struct kvm_vcpu *vcpu, u32 access,
6837 struct x86_exception *exception)
6838{
6839 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
6840 void *data = val;
6841 int r = X86EMUL_CONTINUE;
6842
6843 while (bytes) {
6844 gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access, exception);
6845 unsigned offset = addr & (PAGE_SIZE-1);
6846 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
6847 int ret;
6848
6849 if (gpa == UNMAPPED_GVA)
6850 return X86EMUL_PROPAGATE_FAULT;
6851 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
6852 if (ret < 0) {
6853 r = X86EMUL_IO_NEEDED;
6854 goto out;
6855 }
6856
6857 bytes -= towrite;
6858 data += towrite;
6859 addr += towrite;
6860 }
6861out:
6862 return r;
6863}
6864
6865static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
6866 unsigned int bytes, struct x86_exception *exception,
6867 bool system)
6868{
6869 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6870 u32 access = PFERR_WRITE_MASK;
6871
6872 if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3)
6873 access |= PFERR_USER_MASK;
6874
6875 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
6876 access, exception);
6877}
6878
6879int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
6880 unsigned int bytes, struct x86_exception *exception)
6881{
6882
6883 vcpu->arch.l1tf_flush_l1d = true;
6884
6885 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
6886 PFERR_WRITE_MASK, exception);
6887}
6888EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
6889
6890static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type,
6891 void *insn, int insn_len)
6892{
6893 return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type,
6894 insn, insn_len);
6895}
6896
6897int handle_ud(struct kvm_vcpu *vcpu)
6898{
6899 static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
6900 int emul_type = EMULTYPE_TRAP_UD;
6901 char sig[5];
6902 struct x86_exception e;
6903
6904 if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0)))
6905 return 1;
6906
6907 if (force_emulation_prefix &&
6908 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
6909 sig, sizeof(sig), &e) == 0 &&
6910 memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
6911 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
6912 emul_type = EMULTYPE_TRAP_UD_FORCED;
6913 }
6914
6915 return kvm_emulate_instruction(vcpu, emul_type);
6916}
6917EXPORT_SYMBOL_GPL(handle_ud);
6918
6919static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
6920 gpa_t gpa, bool write)
6921{
6922
6923 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
6924 return 1;
6925
6926 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
6927 trace_vcpu_match_mmio(gva, gpa, write, true);
6928 return 1;
6929 }
6930
6931 return 0;
6932}
6933
6934static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
6935 gpa_t *gpa, struct x86_exception *exception,
6936 bool write)
6937{
6938 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
6939 u32 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0)
6940 | (write ? PFERR_WRITE_MASK : 0);
6941
6942
6943
6944
6945
6946
6947 if (vcpu_match_mmio_gva(vcpu, gva) && (!is_paging(vcpu) ||
6948 !permission_fault(vcpu, vcpu->arch.walk_mmu,
6949 vcpu->arch.mmio_access, 0, access))) {
6950 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
6951 (gva & (PAGE_SIZE - 1));
6952 trace_vcpu_match_mmio(gva, *gpa, write, false);
6953 return 1;
6954 }
6955
6956 *gpa = mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
6957
6958 if (*gpa == UNMAPPED_GVA)
6959 return -1;
6960
6961 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
6962}
6963
6964int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
6965 const void *val, int bytes)
6966{
6967 int ret;
6968
6969 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
6970 if (ret < 0)
6971 return 0;
6972 kvm_page_track_write(vcpu, gpa, val, bytes);
6973 return 1;
6974}
6975
6976struct read_write_emulator_ops {
6977 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
6978 int bytes);
6979 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
6980 void *val, int bytes);
6981 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
6982 int bytes, void *val);
6983 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
6984 void *val, int bytes);
6985 bool write;
6986};
6987
6988static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
6989{
6990 if (vcpu->mmio_read_completed) {
6991 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
6992 vcpu->mmio_fragments[0].gpa, val);
6993 vcpu->mmio_read_completed = 0;
6994 return 1;
6995 }
6996
6997 return 0;
6998}
6999
7000static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
7001 void *val, int bytes)
7002{
7003 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
7004}
7005
7006static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
7007 void *val, int bytes)
7008{
7009 return emulator_write_phys(vcpu, gpa, val, bytes);
7010}
7011
7012static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
7013{
7014 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
7015 return vcpu_mmio_write(vcpu, gpa, bytes, val);
7016}
7017
7018static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
7019 void *val, int bytes)
7020{
7021 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
7022 return X86EMUL_IO_NEEDED;
7023}
7024
7025static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
7026 void *val, int bytes)
7027{
7028 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
7029
7030 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
7031 return X86EMUL_CONTINUE;
7032}
7033
7034static const struct read_write_emulator_ops read_emultor = {
7035 .read_write_prepare = read_prepare,
7036 .read_write_emulate = read_emulate,
7037 .read_write_mmio = vcpu_mmio_read,
7038 .read_write_exit_mmio = read_exit_mmio,
7039};
7040
7041static const struct read_write_emulator_ops write_emultor = {
7042 .read_write_emulate = write_emulate,
7043 .read_write_mmio = write_mmio,
7044 .read_write_exit_mmio = write_exit_mmio,
7045 .write = true,
7046};
7047
7048static int emulator_read_write_onepage(unsigned long addr, void *val,
7049 unsigned int bytes,
7050 struct x86_exception *exception,
7051 struct kvm_vcpu *vcpu,
7052 const struct read_write_emulator_ops *ops)
7053{
7054 gpa_t gpa;
7055 int handled, ret;
7056 bool write = ops->write;
7057 struct kvm_mmio_fragment *frag;
7058 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7059
7060
7061
7062
7063
7064
7065
7066
7067 if (ctxt->gpa_available && emulator_can_use_gpa(ctxt) &&
7068 (addr & ~PAGE_MASK) == (ctxt->gpa_val & ~PAGE_MASK)) {
7069 gpa = ctxt->gpa_val;
7070 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
7071 } else {
7072 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
7073 if (ret < 0)
7074 return X86EMUL_PROPAGATE_FAULT;
7075 }
7076
7077 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
7078 return X86EMUL_CONTINUE;
7079
7080
7081
7082
7083 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
7084 if (handled == bytes)
7085 return X86EMUL_CONTINUE;
7086
7087 gpa += handled;
7088 bytes -= handled;
7089 val += handled;
7090
7091 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
7092 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
7093 frag->gpa = gpa;
7094 frag->data = val;
7095 frag->len = bytes;
7096 return X86EMUL_CONTINUE;
7097}
7098
7099static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
7100 unsigned long addr,
7101 void *val, unsigned int bytes,
7102 struct x86_exception *exception,
7103 const struct read_write_emulator_ops *ops)
7104{
7105 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7106 gpa_t gpa;
7107 int rc;
7108
7109 if (ops->read_write_prepare &&
7110 ops->read_write_prepare(vcpu, val, bytes))
7111 return X86EMUL_CONTINUE;
7112
7113 vcpu->mmio_nr_fragments = 0;
7114
7115
7116 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
7117 int now;
7118
7119 now = -addr & ~PAGE_MASK;
7120 rc = emulator_read_write_onepage(addr, val, now, exception,
7121 vcpu, ops);
7122
7123 if (rc != X86EMUL_CONTINUE)
7124 return rc;
7125 addr += now;
7126 if (ctxt->mode != X86EMUL_MODE_PROT64)
7127 addr = (u32)addr;
7128 val += now;
7129 bytes -= now;
7130 }
7131
7132 rc = emulator_read_write_onepage(addr, val, bytes, exception,
7133 vcpu, ops);
7134 if (rc != X86EMUL_CONTINUE)
7135 return rc;
7136
7137 if (!vcpu->mmio_nr_fragments)
7138 return rc;
7139
7140 gpa = vcpu->mmio_fragments[0].gpa;
7141
7142 vcpu->mmio_needed = 1;
7143 vcpu->mmio_cur_fragment = 0;
7144
7145 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
7146 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
7147 vcpu->run->exit_reason = KVM_EXIT_MMIO;
7148 vcpu->run->mmio.phys_addr = gpa;
7149
7150 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
7151}
7152
7153static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
7154 unsigned long addr,
7155 void *val,
7156 unsigned int bytes,
7157 struct x86_exception *exception)
7158{
7159 return emulator_read_write(ctxt, addr, val, bytes,
7160 exception, &read_emultor);
7161}
7162
7163static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
7164 unsigned long addr,
7165 const void *val,
7166 unsigned int bytes,
7167 struct x86_exception *exception)
7168{
7169 return emulator_read_write(ctxt, addr, (void *)val, bytes,
7170 exception, &write_emultor);
7171}
7172
7173#define CMPXCHG_TYPE(t, ptr, old, new) \
7174 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
7175
7176#ifdef CONFIG_X86_64
7177# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
7178#else
7179# define CMPXCHG64(ptr, old, new) \
7180 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
7181#endif
7182
7183static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
7184 unsigned long addr,
7185 const void *old,
7186 const void *new,
7187 unsigned int bytes,
7188 struct x86_exception *exception)
7189{
7190 struct kvm_host_map map;
7191 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7192 u64 page_line_mask;
7193 gpa_t gpa;
7194 char *kaddr;
7195 bool exchanged;
7196
7197
7198 if (bytes > 8 || (bytes & (bytes - 1)))
7199 goto emul_write;
7200
7201 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
7202
7203 if (gpa == UNMAPPED_GVA ||
7204 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
7205 goto emul_write;
7206
7207
7208
7209
7210
7211 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
7212 page_line_mask = ~(cache_line_size() - 1);
7213 else
7214 page_line_mask = PAGE_MASK;
7215
7216 if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
7217 goto emul_write;
7218
7219 if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
7220 goto emul_write;
7221
7222 kaddr = map.hva + offset_in_page(gpa);
7223
7224 switch (bytes) {
7225 case 1:
7226 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
7227 break;
7228 case 2:
7229 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
7230 break;
7231 case 4:
7232 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
7233 break;
7234 case 8:
7235 exchanged = CMPXCHG64(kaddr, old, new);
7236 break;
7237 default:
7238 BUG();
7239 }
7240
7241 kvm_vcpu_unmap(vcpu, &map, true);
7242
7243 if (!exchanged)
7244 return X86EMUL_CMPXCHG_FAILED;
7245
7246 kvm_page_track_write(vcpu, gpa, new, bytes);
7247
7248 return X86EMUL_CONTINUE;
7249
7250emul_write:
7251 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
7252
7253 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
7254}
7255
7256static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
7257{
7258 int r = 0, i;
7259
7260 for (i = 0; i < vcpu->arch.pio.count; i++) {
7261 if (vcpu->arch.pio.in)
7262 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
7263 vcpu->arch.pio.size, pd);
7264 else
7265 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
7266 vcpu->arch.pio.port, vcpu->arch.pio.size,
7267 pd);
7268 if (r)
7269 break;
7270 pd += vcpu->arch.pio.size;
7271 }
7272 return r;
7273}
7274
7275static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
7276 unsigned short port,
7277 unsigned int count, bool in)
7278{
7279 vcpu->arch.pio.port = port;
7280 vcpu->arch.pio.in = in;
7281 vcpu->arch.pio.count = count;
7282 vcpu->arch.pio.size = size;
7283
7284 if (!kernel_pio(vcpu, vcpu->arch.pio_data))
7285 return 1;
7286
7287 vcpu->run->exit_reason = KVM_EXIT_IO;
7288 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
7289 vcpu->run->io.size = size;
7290 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
7291 vcpu->run->io.count = count;
7292 vcpu->run->io.port = port;
7293
7294 return 0;
7295}
7296
7297static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
7298 unsigned short port, unsigned int count)
7299{
7300 WARN_ON(vcpu->arch.pio.count);
7301 memset(vcpu->arch.pio_data, 0, size * count);
7302 return emulator_pio_in_out(vcpu, size, port, count, true);
7303}
7304
7305static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
7306{
7307 int size = vcpu->arch.pio.size;
7308 unsigned count = vcpu->arch.pio.count;
7309 memcpy(val, vcpu->arch.pio_data, size * count);
7310 trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
7311 vcpu->arch.pio.count = 0;
7312}
7313
7314static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
7315 unsigned short port, void *val, unsigned int count)
7316{
7317 if (vcpu->arch.pio.count) {
7318
7319
7320
7321
7322
7323
7324
7325 } else {
7326 int r = __emulator_pio_in(vcpu, size, port, count);
7327 if (!r)
7328 return r;
7329
7330
7331 }
7332
7333 complete_emulator_pio_in(vcpu, val);
7334 return 1;
7335}
7336
7337static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
7338 int size, unsigned short port, void *val,
7339 unsigned int count)
7340{
7341 return emulator_pio_in(emul_to_vcpu(ctxt), size, port, val, count);
7342
7343}
7344
7345static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
7346 unsigned short port, const void *val,
7347 unsigned int count)
7348{
7349 int ret;
7350
7351 memcpy(vcpu->arch.pio_data, val, size * count);
7352 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
7353 ret = emulator_pio_in_out(vcpu, size, port, count, false);
7354 if (ret)
7355 vcpu->arch.pio.count = 0;
7356
7357 return ret;
7358}
7359
7360static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
7361 int size, unsigned short port,
7362 const void *val, unsigned int count)
7363{
7364 return emulator_pio_out(emul_to_vcpu(ctxt), size, port, val, count);
7365}
7366
7367static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
7368{
7369 return static_call(kvm_x86_get_segment_base)(vcpu, seg);
7370}
7371
7372static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
7373{
7374 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
7375}
7376
7377static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
7378{
7379 if (!need_emulate_wbinvd(vcpu))
7380 return X86EMUL_CONTINUE;
7381
7382 if (static_call(kvm_x86_has_wbinvd_exit)()) {
7383 int cpu = get_cpu();
7384
7385 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
7386 on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
7387 wbinvd_ipi, NULL, 1);
7388 put_cpu();
7389 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
7390 } else
7391 wbinvd();
7392 return X86EMUL_CONTINUE;
7393}
7394
7395int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
7396{
7397 kvm_emulate_wbinvd_noskip(vcpu);
7398 return kvm_skip_emulated_instruction(vcpu);
7399}
7400EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
7401
7402
7403
7404static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
7405{
7406 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
7407}
7408
7409static void emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
7410 unsigned long *dest)
7411{
7412 kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
7413}
7414
7415static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
7416 unsigned long value)
7417{
7418
7419 return kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
7420}
7421
7422static u64 mk_cr_64(u64 curr_cr, u32 new_val)
7423{
7424 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
7425}
7426
7427static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
7428{
7429 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7430 unsigned long value;
7431
7432 switch (cr) {
7433 case 0:
7434 value = kvm_read_cr0(vcpu);
7435 break;
7436 case 2:
7437 value = vcpu->arch.cr2;
7438 break;
7439 case 3:
7440 value = kvm_read_cr3(vcpu);
7441 break;
7442 case 4:
7443 value = kvm_read_cr4(vcpu);
7444 break;
7445 case 8:
7446 value = kvm_get_cr8(vcpu);
7447 break;
7448 default:
7449 kvm_err("%s: unexpected cr %u\n", __func__, cr);
7450 return 0;
7451 }
7452
7453 return value;
7454}
7455
7456static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
7457{
7458 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7459 int res = 0;
7460
7461 switch (cr) {
7462 case 0:
7463 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
7464 break;
7465 case 2:
7466 vcpu->arch.cr2 = val;
7467 break;
7468 case 3:
7469 res = kvm_set_cr3(vcpu, val);
7470 break;
7471 case 4:
7472 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
7473 break;
7474 case 8:
7475 res = kvm_set_cr8(vcpu, val);
7476 break;
7477 default:
7478 kvm_err("%s: unexpected cr %u\n", __func__, cr);
7479 res = -1;
7480 }
7481
7482 return res;
7483}
7484
7485static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
7486{
7487 return static_call(kvm_x86_get_cpl)(emul_to_vcpu(ctxt));
7488}
7489
7490static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7491{
7492 static_call(kvm_x86_get_gdt)(emul_to_vcpu(ctxt), dt);
7493}
7494
7495static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7496{
7497 static_call(kvm_x86_get_idt)(emul_to_vcpu(ctxt), dt);
7498}
7499
7500static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7501{
7502 static_call(kvm_x86_set_gdt)(emul_to_vcpu(ctxt), dt);
7503}
7504
7505static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
7506{
7507 static_call(kvm_x86_set_idt)(emul_to_vcpu(ctxt), dt);
7508}
7509
7510static unsigned long emulator_get_cached_segment_base(
7511 struct x86_emulate_ctxt *ctxt, int seg)
7512{
7513 return get_segment_base(emul_to_vcpu(ctxt), seg);
7514}
7515
7516static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
7517 struct desc_struct *desc, u32 *base3,
7518 int seg)
7519{
7520 struct kvm_segment var;
7521
7522 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
7523 *selector = var.selector;
7524
7525 if (var.unusable) {
7526 memset(desc, 0, sizeof(*desc));
7527 if (base3)
7528 *base3 = 0;
7529 return false;
7530 }
7531
7532 if (var.g)
7533 var.limit >>= 12;
7534 set_desc_limit(desc, var.limit);
7535 set_desc_base(desc, (unsigned long)var.base);
7536#ifdef CONFIG_X86_64
7537 if (base3)
7538 *base3 = var.base >> 32;
7539#endif
7540 desc->type = var.type;
7541 desc->s = var.s;
7542 desc->dpl = var.dpl;
7543 desc->p = var.present;
7544 desc->avl = var.avl;
7545 desc->l = var.l;
7546 desc->d = var.db;
7547 desc->g = var.g;
7548
7549 return true;
7550}
7551
7552static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
7553 struct desc_struct *desc, u32 base3,
7554 int seg)
7555{
7556 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7557 struct kvm_segment var;
7558
7559 var.selector = selector;
7560 var.base = get_desc_base(desc);
7561#ifdef CONFIG_X86_64
7562 var.base |= ((u64)base3) << 32;
7563#endif
7564 var.limit = get_desc_limit(desc);
7565 if (desc->g)
7566 var.limit = (var.limit << 12) | 0xfff;
7567 var.type = desc->type;
7568 var.dpl = desc->dpl;
7569 var.db = desc->d;
7570 var.s = desc->s;
7571 var.l = desc->l;
7572 var.g = desc->g;
7573 var.avl = desc->avl;
7574 var.present = desc->p;
7575 var.unusable = !var.present;
7576 var.padding = 0;
7577
7578 kvm_set_segment(vcpu, &var, seg);
7579 return;
7580}
7581
7582static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
7583 u32 msr_index, u64 *pdata)
7584{
7585 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7586 int r;
7587
7588 r = kvm_get_msr(vcpu, msr_index, pdata);
7589
7590 if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0,
7591 complete_emulated_rdmsr, r)) {
7592
7593 return X86EMUL_IO_NEEDED;
7594 }
7595
7596 return r;
7597}
7598
7599static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
7600 u32 msr_index, u64 data)
7601{
7602 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7603 int r;
7604
7605 r = kvm_set_msr(vcpu, msr_index, data);
7606
7607 if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data,
7608 complete_emulated_msr_access, r)) {
7609
7610 return X86EMUL_IO_NEEDED;
7611 }
7612
7613 return r;
7614}
7615
7616static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
7617{
7618 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7619
7620 return vcpu->arch.smbase;
7621}
7622
7623static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
7624{
7625 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7626
7627 vcpu->arch.smbase = smbase;
7628}
7629
7630static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
7631 u32 pmc)
7632{
7633 if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
7634 return 0;
7635 return -EINVAL;
7636}
7637
7638static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
7639 u32 pmc, u64 *pdata)
7640{
7641 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
7642}
7643
7644static void emulator_halt(struct x86_emulate_ctxt *ctxt)
7645{
7646 emul_to_vcpu(ctxt)->arch.halt_request = 1;
7647}
7648
7649static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
7650 struct x86_instruction_info *info,
7651 enum x86_intercept_stage stage)
7652{
7653 return static_call(kvm_x86_check_intercept)(emul_to_vcpu(ctxt), info, stage,
7654 &ctxt->exception);
7655}
7656
7657static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
7658 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx,
7659 bool exact_only)
7660{
7661 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
7662}
7663
7664static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
7665{
7666 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
7667}
7668
7669static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
7670{
7671 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
7672}
7673
7674static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
7675{
7676 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
7677}
7678
7679static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
7680{
7681 return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
7682}
7683
7684static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
7685{
7686 kvm_register_write_raw(emul_to_vcpu(ctxt), reg, val);
7687}
7688
7689static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
7690{
7691 static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
7692}
7693
7694static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
7695{
7696 return emul_to_vcpu(ctxt)->arch.hflags;
7697}
7698
7699static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
7700{
7701 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7702
7703 kvm_smm_changed(vcpu, false);
7704}
7705
7706static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
7707 const char *smstate)
7708{
7709 return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
7710}
7711
7712static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
7713{
7714 kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
7715}
7716
7717static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
7718{
7719 return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
7720}
7721
7722static const struct x86_emulate_ops emulate_ops = {
7723 .read_gpr = emulator_read_gpr,
7724 .write_gpr = emulator_write_gpr,
7725 .read_std = emulator_read_std,
7726 .write_std = emulator_write_std,
7727 .read_phys = kvm_read_guest_phys_system,
7728 .fetch = kvm_fetch_guest_virt,
7729 .read_emulated = emulator_read_emulated,
7730 .write_emulated = emulator_write_emulated,
7731 .cmpxchg_emulated = emulator_cmpxchg_emulated,
7732 .invlpg = emulator_invlpg,
7733 .pio_in_emulated = emulator_pio_in_emulated,
7734 .pio_out_emulated = emulator_pio_out_emulated,
7735 .get_segment = emulator_get_segment,
7736 .set_segment = emulator_set_segment,
7737 .get_cached_segment_base = emulator_get_cached_segment_base,
7738 .get_gdt = emulator_get_gdt,
7739 .get_idt = emulator_get_idt,
7740 .set_gdt = emulator_set_gdt,
7741 .set_idt = emulator_set_idt,
7742 .get_cr = emulator_get_cr,
7743 .set_cr = emulator_set_cr,
7744 .cpl = emulator_get_cpl,
7745 .get_dr = emulator_get_dr,
7746 .set_dr = emulator_set_dr,
7747 .get_smbase = emulator_get_smbase,
7748 .set_smbase = emulator_set_smbase,
7749 .set_msr = emulator_set_msr,
7750 .get_msr = emulator_get_msr,
7751 .check_pmc = emulator_check_pmc,
7752 .read_pmc = emulator_read_pmc,
7753 .halt = emulator_halt,
7754 .wbinvd = emulator_wbinvd,
7755 .fix_hypercall = emulator_fix_hypercall,
7756 .intercept = emulator_intercept,
7757 .get_cpuid = emulator_get_cpuid,
7758 .guest_has_long_mode = emulator_guest_has_long_mode,
7759 .guest_has_movbe = emulator_guest_has_movbe,
7760 .guest_has_fxsr = emulator_guest_has_fxsr,
7761 .set_nmi_mask = emulator_set_nmi_mask,
7762 .get_hflags = emulator_get_hflags,
7763 .exiting_smm = emulator_exiting_smm,
7764 .leave_smm = emulator_leave_smm,
7765 .triple_fault = emulator_triple_fault,
7766 .set_xcr = emulator_set_xcr,
7767};
7768
7769static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
7770{
7771 u32 int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
7772
7773
7774
7775
7776
7777
7778
7779 if (int_shadow & mask)
7780 mask = 0;
7781 if (unlikely(int_shadow || mask)) {
7782 static_call(kvm_x86_set_interrupt_shadow)(vcpu, mask);
7783 if (!mask)
7784 kvm_make_request(KVM_REQ_EVENT, vcpu);
7785 }
7786}
7787
7788static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
7789{
7790 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7791 if (ctxt->exception.vector == PF_VECTOR)
7792 return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
7793
7794 if (ctxt->exception.error_code_valid)
7795 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
7796 ctxt->exception.error_code);
7797 else
7798 kvm_queue_exception(vcpu, ctxt->exception.vector);
7799 return false;
7800}
7801
7802static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
7803{
7804 struct x86_emulate_ctxt *ctxt;
7805
7806 ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT);
7807 if (!ctxt) {
7808 pr_err("kvm: failed to allocate vcpu's emulator\n");
7809 return NULL;
7810 }
7811
7812 ctxt->vcpu = vcpu;
7813 ctxt->ops = &emulate_ops;
7814 vcpu->arch.emulate_ctxt = ctxt;
7815
7816 return ctxt;
7817}
7818
7819static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
7820{
7821 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7822 int cs_db, cs_l;
7823
7824 static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
7825
7826 ctxt->gpa_available = false;
7827 ctxt->eflags = kvm_get_rflags(vcpu);
7828 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
7829
7830 ctxt->eip = kvm_rip_read(vcpu);
7831 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
7832 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
7833 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
7834 cs_db ? X86EMUL_MODE_PROT32 :
7835 X86EMUL_MODE_PROT16;
7836 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
7837 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
7838 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
7839
7840 ctxt->interruptibility = 0;
7841 ctxt->have_exception = false;
7842 ctxt->exception.vector = -1;
7843 ctxt->perm_ok = false;
7844
7845 init_decode_cache(ctxt);
7846 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
7847}
7848
7849void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
7850{
7851 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7852 int ret;
7853
7854 init_emulate_ctxt(vcpu);
7855
7856 ctxt->op_bytes = 2;
7857 ctxt->ad_bytes = 2;
7858 ctxt->_eip = ctxt->eip + inc_eip;
7859 ret = emulate_int_real(ctxt, irq);
7860
7861 if (ret != X86EMUL_CONTINUE) {
7862 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
7863 } else {
7864 ctxt->eip = ctxt->_eip;
7865 kvm_rip_write(vcpu, ctxt->eip);
7866 kvm_set_rflags(vcpu, ctxt->eflags);
7867 }
7868}
7869EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
7870
7871static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
7872 u8 ndata, u8 *insn_bytes, u8 insn_size)
7873{
7874 struct kvm_run *run = vcpu->run;
7875 u64 info[5];
7876 u8 info_start;
7877
7878
7879
7880
7881
7882 memset(&info, 0, sizeof(info));
7883
7884 static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1],
7885 &info[2], (u32 *)&info[3],
7886 (u32 *)&info[4]);
7887
7888 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7889 run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
7890
7891
7892
7893
7894
7895
7896 if (WARN_ON_ONCE(ndata > 4))
7897 ndata = 4;
7898
7899
7900 info_start = 1;
7901 run->emulation_failure.flags = 0;
7902
7903 if (insn_size) {
7904 BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) +
7905 sizeof(run->emulation_failure.insn_bytes) != 16));
7906 info_start += 2;
7907 run->emulation_failure.flags |=
7908 KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
7909 run->emulation_failure.insn_size = insn_size;
7910 memset(run->emulation_failure.insn_bytes, 0x90,
7911 sizeof(run->emulation_failure.insn_bytes));
7912 memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size);
7913 }
7914
7915 memcpy(&run->internal.data[info_start], info, sizeof(info));
7916 memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data,
7917 ndata * sizeof(data[0]));
7918
7919 run->emulation_failure.ndata = info_start + ARRAY_SIZE(info) + ndata;
7920}
7921
7922static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu)
7923{
7924 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7925
7926 prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data,
7927 ctxt->fetch.end - ctxt->fetch.data);
7928}
7929
7930void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data,
7931 u8 ndata)
7932{
7933 prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0);
7934}
7935EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit);
7936
7937void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
7938{
7939 __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0);
7940}
7941EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit);
7942
7943static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
7944{
7945 struct kvm *kvm = vcpu->kvm;
7946
7947 ++vcpu->stat.insn_emulation_fail;
7948 trace_kvm_emulate_insn_failed(vcpu);
7949
7950 if (emulation_type & EMULTYPE_VMWARE_GP) {
7951 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
7952 return 1;
7953 }
7954
7955 if (kvm->arch.exit_on_emulation_error ||
7956 (emulation_type & EMULTYPE_SKIP)) {
7957 prepare_emulation_ctxt_failure_exit(vcpu);
7958 return 0;
7959 }
7960
7961 kvm_queue_exception(vcpu, UD_VECTOR);
7962
7963 if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
7964 prepare_emulation_ctxt_failure_exit(vcpu);
7965 return 0;
7966 }
7967
7968 return 1;
7969}
7970
7971static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
7972 bool write_fault_to_shadow_pgtable,
7973 int emulation_type)
7974{
7975 gpa_t gpa = cr2_or_gpa;
7976 kvm_pfn_t pfn;
7977
7978 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
7979 return false;
7980
7981 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
7982 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
7983 return false;
7984
7985 if (!vcpu->arch.mmu->direct_map) {
7986
7987
7988
7989
7990 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
7991
7992
7993
7994
7995
7996 if (gpa == UNMAPPED_GVA)
7997 return true;
7998 }
7999
8000
8001
8002
8003
8004
8005
8006 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
8007
8008
8009
8010
8011
8012 if (is_error_noslot_pfn(pfn))
8013 return false;
8014
8015 kvm_release_pfn_clean(pfn);
8016
8017
8018 if (vcpu->arch.mmu->direct_map) {
8019 unsigned int indirect_shadow_pages;
8020
8021 write_lock(&vcpu->kvm->mmu_lock);
8022 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
8023 write_unlock(&vcpu->kvm->mmu_lock);
8024
8025 if (indirect_shadow_pages)
8026 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
8027
8028 return true;
8029 }
8030
8031
8032
8033
8034
8035
8036 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
8037
8038
8039
8040
8041
8042
8043 return !write_fault_to_shadow_pgtable;
8044}
8045
8046static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
8047 gpa_t cr2_or_gpa, int emulation_type)
8048{
8049 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
8050 unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
8051
8052 last_retry_eip = vcpu->arch.last_retry_eip;
8053 last_retry_addr = vcpu->arch.last_retry_addr;
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
8069
8070 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
8071 return false;
8072
8073 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
8074 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
8075 return false;
8076
8077 if (x86_page_table_writing_insn(ctxt))
8078 return false;
8079
8080 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
8081 return false;
8082
8083 vcpu->arch.last_retry_eip = ctxt->eip;
8084 vcpu->arch.last_retry_addr = cr2_or_gpa;
8085
8086 if (!vcpu->arch.mmu->direct_map)
8087 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
8088
8089 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
8090
8091 return true;
8092}
8093
8094static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
8095static int complete_emulated_pio(struct kvm_vcpu *vcpu);
8096
8097static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
8098{
8099 trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
8100
8101 if (entering_smm) {
8102 vcpu->arch.hflags |= HF_SMM_MASK;
8103 } else {
8104 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
8105
8106
8107 kvm_make_request(KVM_REQ_EVENT, vcpu);
8108
8109
8110
8111
8112
8113
8114 vcpu->arch.pdptrs_from_userspace = false;
8115 }
8116
8117 kvm_mmu_reset_context(vcpu);
8118}
8119
8120static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
8121 unsigned long *db)
8122{
8123 u32 dr6 = 0;
8124 int i;
8125 u32 enable, rwlen;
8126
8127 enable = dr7;
8128 rwlen = dr7 >> 16;
8129 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
8130 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
8131 dr6 |= (1 << i);
8132 return dr6;
8133}
8134
8135static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
8136{
8137 struct kvm_run *kvm_run = vcpu->run;
8138
8139 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
8140 kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW;
8141 kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
8142 kvm_run->debug.arch.exception = DB_VECTOR;
8143 kvm_run->exit_reason = KVM_EXIT_DEBUG;
8144 return 0;
8145 }
8146 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
8147 return 1;
8148}
8149
8150int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
8151{
8152 unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
8153 int r;
8154
8155 r = static_call(kvm_x86_skip_emulated_instruction)(vcpu);
8156 if (unlikely(!r))
8157 return 0;
8158
8159 kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169 if (unlikely(rflags & X86_EFLAGS_TF))
8170 r = kvm_vcpu_do_singlestep(vcpu);
8171 return r;
8172}
8173EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
8174
8175static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
8176{
8177 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
8178 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
8179 struct kvm_run *kvm_run = vcpu->run;
8180 unsigned long eip = kvm_get_linear_rip(vcpu);
8181 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
8182 vcpu->arch.guest_debug_dr7,
8183 vcpu->arch.eff_db);
8184
8185 if (dr6 != 0) {
8186 kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
8187 kvm_run->debug.arch.pc = eip;
8188 kvm_run->debug.arch.exception = DB_VECTOR;
8189 kvm_run->exit_reason = KVM_EXIT_DEBUG;
8190 *r = 0;
8191 return true;
8192 }
8193 }
8194
8195 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
8196 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
8197 unsigned long eip = kvm_get_linear_rip(vcpu);
8198 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
8199 vcpu->arch.dr7,
8200 vcpu->arch.db);
8201
8202 if (dr6 != 0) {
8203 kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
8204 *r = 1;
8205 return true;
8206 }
8207 }
8208
8209 return false;
8210}
8211
8212static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
8213{
8214 switch (ctxt->opcode_len) {
8215 case 1:
8216 switch (ctxt->b) {
8217 case 0xe4:
8218 case 0xe5:
8219 case 0xec:
8220 case 0xed:
8221 case 0xe6:
8222 case 0xe7:
8223 case 0xee:
8224 case 0xef:
8225 case 0x6c:
8226 case 0x6d:
8227 case 0x6e:
8228 case 0x6f:
8229 return true;
8230 }
8231 break;
8232 case 2:
8233 switch (ctxt->b) {
8234 case 0x33:
8235 return true;
8236 }
8237 break;
8238 }
8239
8240 return false;
8241}
8242
8243
8244
8245
8246int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
8247 void *insn, int insn_len)
8248{
8249 int r = EMULATION_OK;
8250 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8251
8252 init_emulate_ctxt(vcpu);
8253
8254
8255
8256
8257
8258
8259 if (!(emulation_type & EMULTYPE_SKIP) &&
8260 kvm_vcpu_check_breakpoint(vcpu, &r))
8261 return r;
8262
8263 r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
8264
8265 trace_kvm_emulate_insn_start(vcpu);
8266 ++vcpu->stat.insn_emulation;
8267
8268 return r;
8269}
8270EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
8271
8272int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
8273 int emulation_type, void *insn, int insn_len)
8274{
8275 int r;
8276 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
8277 bool writeback = true;
8278 bool write_fault_to_spt;
8279
8280 if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
8281 return 1;
8282
8283 vcpu->arch.l1tf_flush_l1d = true;
8284
8285
8286
8287
8288
8289 write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
8290 vcpu->arch.write_fault_to_shadow_pgtable = false;
8291
8292 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
8293 kvm_clear_exception_queue(vcpu);
8294
8295 r = x86_decode_emulated_instruction(vcpu, emulation_type,
8296 insn, insn_len);
8297 if (r != EMULATION_OK) {
8298 if ((emulation_type & EMULTYPE_TRAP_UD) ||
8299 (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
8300 kvm_queue_exception(vcpu, UD_VECTOR);
8301 return 1;
8302 }
8303 if (reexecute_instruction(vcpu, cr2_or_gpa,
8304 write_fault_to_spt,
8305 emulation_type))
8306 return 1;
8307 if (ctxt->have_exception) {
8308
8309
8310
8311
8312 WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
8313 exception_type(ctxt->exception.vector) == EXCPT_TRAP);
8314 inject_emulated_exception(vcpu);
8315 return 1;
8316 }
8317 return handle_emulation_failure(vcpu, emulation_type);
8318 }
8319 }
8320
8321 if ((emulation_type & EMULTYPE_VMWARE_GP) &&
8322 !is_vmware_backdoor_opcode(ctxt)) {
8323 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
8324 return 1;
8325 }
8326
8327
8328
8329
8330
8331
8332
8333 if (emulation_type & EMULTYPE_SKIP) {
8334 if (ctxt->mode != X86EMUL_MODE_PROT64)
8335 ctxt->eip = (u32)ctxt->_eip;
8336 else
8337 ctxt->eip = ctxt->_eip;
8338
8339 if (emulation_type & EMULTYPE_COMPLETE_USER_EXIT) {
8340 r = 1;
8341 goto writeback;
8342 }
8343
8344 kvm_rip_write(vcpu, ctxt->eip);
8345 if (ctxt->eflags & X86_EFLAGS_RF)
8346 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
8347 return 1;
8348 }
8349
8350 if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
8351 return 1;
8352
8353
8354
8355 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
8356 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
8357 emulator_invalidate_register_cache(ctxt);
8358 }
8359
8360restart:
8361 if (emulation_type & EMULTYPE_PF) {
8362
8363 ctxt->exception.address = cr2_or_gpa;
8364
8365
8366 if (vcpu->arch.mmu->direct_map) {
8367 ctxt->gpa_available = true;
8368 ctxt->gpa_val = cr2_or_gpa;
8369 }
8370 } else {
8371
8372 ctxt->exception.address = 0;
8373 }
8374
8375 r = x86_emulate_insn(ctxt);
8376
8377 if (r == EMULATION_INTERCEPTED)
8378 return 1;
8379
8380 if (r == EMULATION_FAILED) {
8381 if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
8382 emulation_type))
8383 return 1;
8384
8385 return handle_emulation_failure(vcpu, emulation_type);
8386 }
8387
8388 if (ctxt->have_exception) {
8389 r = 1;
8390 if (inject_emulated_exception(vcpu))
8391 return r;
8392 } else if (vcpu->arch.pio.count) {
8393 if (!vcpu->arch.pio.in) {
8394
8395 vcpu->arch.pio.count = 0;
8396 } else {
8397 writeback = false;
8398 vcpu->arch.complete_userspace_io = complete_emulated_pio;
8399 }
8400 r = 0;
8401 } else if (vcpu->mmio_needed) {
8402 ++vcpu->stat.mmio_exits;
8403
8404 if (!vcpu->mmio_is_write)
8405 writeback = false;
8406 r = 0;
8407 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
8408 } else if (vcpu->arch.complete_userspace_io) {
8409 writeback = false;
8410 r = 0;
8411 } else if (r == EMULATION_RESTART)
8412 goto restart;
8413 else
8414 r = 1;
8415
8416writeback:
8417 if (writeback) {
8418 unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
8419 toggle_interruptibility(vcpu, ctxt->interruptibility);
8420 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8421 if (!ctxt->have_exception ||
8422 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
8423 kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
8424 if (ctxt->is_branch)
8425 kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
8426 kvm_rip_write(vcpu, ctxt->eip);
8427 if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
8428 r = kvm_vcpu_do_singlestep(vcpu);
8429 if (kvm_x86_ops.update_emulated_instruction)
8430 static_call(kvm_x86_update_emulated_instruction)(vcpu);
8431 __kvm_set_rflags(vcpu, ctxt->eflags);
8432 }
8433
8434
8435
8436
8437
8438
8439
8440 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
8441 kvm_make_request(KVM_REQ_EVENT, vcpu);
8442 } else
8443 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
8444
8445 return r;
8446}
8447
8448int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
8449{
8450 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
8451}
8452EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
8453
8454int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
8455 void *insn, int insn_len)
8456{
8457 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
8458}
8459EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
8460
8461static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
8462{
8463 vcpu->arch.pio.count = 0;
8464 return 1;
8465}
8466
8467static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
8468{
8469 vcpu->arch.pio.count = 0;
8470
8471 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
8472 return 1;
8473
8474 return kvm_skip_emulated_instruction(vcpu);
8475}
8476
8477static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
8478 unsigned short port)
8479{
8480 unsigned long val = kvm_rax_read(vcpu);
8481 int ret = emulator_pio_out(vcpu, size, port, &val, 1);
8482
8483 if (ret)
8484 return ret;
8485
8486
8487
8488
8489
8490 if (port == 0x7e &&
8491 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
8492 vcpu->arch.complete_userspace_io =
8493 complete_fast_pio_out_port_0x7e;
8494 kvm_skip_emulated_instruction(vcpu);
8495 } else {
8496 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
8497 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
8498 }
8499 return 0;
8500}
8501
8502static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
8503{
8504 unsigned long val;
8505
8506
8507 BUG_ON(vcpu->arch.pio.count != 1);
8508
8509 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
8510 vcpu->arch.pio.count = 0;
8511 return 1;
8512 }
8513
8514
8515 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
8516
8517
8518
8519
8520
8521 emulator_pio_in(vcpu, vcpu->arch.pio.size, vcpu->arch.pio.port, &val, 1);
8522 kvm_rax_write(vcpu, val);
8523
8524 return kvm_skip_emulated_instruction(vcpu);
8525}
8526
8527static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
8528 unsigned short port)
8529{
8530 unsigned long val;
8531 int ret;
8532
8533
8534 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
8535
8536 ret = emulator_pio_in(vcpu, size, port, &val, 1);
8537 if (ret) {
8538 kvm_rax_write(vcpu, val);
8539 return ret;
8540 }
8541
8542 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
8543 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
8544
8545 return 0;
8546}
8547
8548int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
8549{
8550 int ret;
8551
8552 if (in)
8553 ret = kvm_fast_pio_in(vcpu, size, port);
8554 else
8555 ret = kvm_fast_pio_out(vcpu, size, port);
8556 return ret && kvm_skip_emulated_instruction(vcpu);
8557}
8558EXPORT_SYMBOL_GPL(kvm_fast_pio);
8559
8560static int kvmclock_cpu_down_prep(unsigned int cpu)
8561{
8562 __this_cpu_write(cpu_tsc_khz, 0);
8563 return 0;
8564}
8565
8566static void tsc_khz_changed(void *data)
8567{
8568 struct cpufreq_freqs *freq = data;
8569 unsigned long khz = 0;
8570
8571 if (data)
8572 khz = freq->new;
8573 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
8574 khz = cpufreq_quick_get(raw_smp_processor_id());
8575 if (!khz)
8576 khz = tsc_khz;
8577 __this_cpu_write(cpu_tsc_khz, khz);
8578}
8579
8580#ifdef CONFIG_X86_64
8581static void kvm_hyperv_tsc_notifier(void)
8582{
8583 struct kvm *kvm;
8584 int cpu;
8585
8586 mutex_lock(&kvm_lock);
8587 list_for_each_entry(kvm, &vm_list, vm_list)
8588 kvm_make_mclock_inprogress_request(kvm);
8589
8590
8591 hyperv_stop_tsc_emulation();
8592
8593
8594 for_each_present_cpu(cpu)
8595 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
8596 kvm_max_guest_tsc_khz = tsc_khz;
8597
8598 list_for_each_entry(kvm, &vm_list, vm_list) {
8599 __kvm_start_pvclock_update(kvm);
8600 pvclock_update_vm_gtod_copy(kvm);
8601 kvm_end_pvclock_update(kvm);
8602 }
8603
8604 mutex_unlock(&kvm_lock);
8605}
8606#endif
8607
8608static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
8609{
8610 struct kvm *kvm;
8611 struct kvm_vcpu *vcpu;
8612 int send_ipi = 0;
8613 unsigned long i;
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
8655
8656 mutex_lock(&kvm_lock);
8657 list_for_each_entry(kvm, &vm_list, vm_list) {
8658 kvm_for_each_vcpu(i, vcpu, kvm) {
8659 if (vcpu->cpu != cpu)
8660 continue;
8661 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8662 if (vcpu->cpu != raw_smp_processor_id())
8663 send_ipi = 1;
8664 }
8665 }
8666 mutex_unlock(&kvm_lock);
8667
8668 if (freq->old < freq->new && send_ipi) {
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
8682 }
8683}
8684
8685static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
8686 void *data)
8687{
8688 struct cpufreq_freqs *freq = data;
8689 int cpu;
8690
8691 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
8692 return 0;
8693 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
8694 return 0;
8695
8696 for_each_cpu(cpu, freq->policy->cpus)
8697 __kvmclock_cpufreq_notifier(freq, cpu);
8698
8699 return 0;
8700}
8701
8702static struct notifier_block kvmclock_cpufreq_notifier_block = {
8703 .notifier_call = kvmclock_cpufreq_notifier
8704};
8705
8706static int kvmclock_cpu_online(unsigned int cpu)
8707{
8708 tsc_khz_changed(NULL);
8709 return 0;
8710}
8711
8712static void kvm_timer_init(void)
8713{
8714 max_tsc_khz = tsc_khz;
8715
8716 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
8717#ifdef CONFIG_CPU_FREQ
8718 struct cpufreq_policy *policy;
8719 int cpu;
8720
8721 cpu = get_cpu();
8722 policy = cpufreq_cpu_get(cpu);
8723 if (policy) {
8724 if (policy->cpuinfo.max_freq)
8725 max_tsc_khz = policy->cpuinfo.max_freq;
8726 cpufreq_cpu_put(policy);
8727 }
8728 put_cpu();
8729#endif
8730 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
8731 CPUFREQ_TRANSITION_NOTIFIER);
8732 }
8733
8734 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
8735 kvmclock_cpu_online, kvmclock_cpu_down_prep);
8736}
8737
8738#ifdef CONFIG_X86_64
8739static void pvclock_gtod_update_fn(struct work_struct *work)
8740{
8741 struct kvm *kvm;
8742 struct kvm_vcpu *vcpu;
8743 unsigned long i;
8744
8745 mutex_lock(&kvm_lock);
8746 list_for_each_entry(kvm, &vm_list, vm_list)
8747 kvm_for_each_vcpu(i, vcpu, kvm)
8748 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
8749 atomic_set(&kvm_guest_has_master_clock, 0);
8750 mutex_unlock(&kvm_lock);
8751}
8752
8753static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
8754
8755
8756
8757
8758
8759
8760static void pvclock_irq_work_fn(struct irq_work *w)
8761{
8762 queue_work(system_long_wq, &pvclock_gtod_work);
8763}
8764
8765static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
8766
8767
8768
8769
8770static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
8771 void *priv)
8772{
8773 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
8774 struct timekeeper *tk = priv;
8775
8776 update_pvclock_gtod(tk);
8777
8778
8779
8780
8781
8782
8783 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
8784 atomic_read(&kvm_guest_has_master_clock) != 0)
8785 irq_work_queue(&pvclock_irq_work);
8786 return 0;
8787}
8788
8789static struct notifier_block pvclock_gtod_notifier = {
8790 .notifier_call = pvclock_gtod_notify,
8791};
8792#endif
8793
8794int kvm_arch_init(void *opaque)
8795{
8796 struct kvm_x86_init_ops *ops = opaque;
8797 int r;
8798
8799 if (kvm_x86_ops.hardware_enable) {
8800 pr_err("kvm: already loaded vendor module '%s'\n", kvm_x86_ops.name);
8801 r = -EEXIST;
8802 goto out;
8803 }
8804
8805 if (!ops->cpu_has_kvm_support()) {
8806 pr_err_ratelimited("kvm: no hardware support for '%s'\n",
8807 ops->runtime_ops->name);
8808 r = -EOPNOTSUPP;
8809 goto out;
8810 }
8811 if (ops->disabled_by_bios()) {
8812 pr_err_ratelimited("kvm: support for '%s' disabled by bios\n",
8813 ops->runtime_ops->name);
8814 r = -EOPNOTSUPP;
8815 goto out;
8816 }
8817
8818
8819
8820
8821
8822
8823 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
8824 printk(KERN_ERR "kvm: inadequate fpu\n");
8825 r = -EOPNOTSUPP;
8826 goto out;
8827 }
8828
8829 r = -ENOMEM;
8830
8831 x86_emulator_cache = kvm_alloc_emulator_cache();
8832 if (!x86_emulator_cache) {
8833 pr_err("kvm: failed to allocate cache for x86 emulator\n");
8834 goto out;
8835 }
8836
8837 user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
8838 if (!user_return_msrs) {
8839 printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
8840 goto out_free_x86_emulator_cache;
8841 }
8842 kvm_nr_uret_msrs = 0;
8843
8844 r = kvm_mmu_module_init();
8845 if (r)
8846 goto out_free_percpu;
8847
8848 kvm_timer_init();
8849
8850 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
8851 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
8852 supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
8853 }
8854
8855 if (pi_inject_timer == -1)
8856 pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
8857#ifdef CONFIG_X86_64
8858 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
8859
8860 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
8861 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
8862#endif
8863
8864 return 0;
8865
8866out_free_percpu:
8867 free_percpu(user_return_msrs);
8868out_free_x86_emulator_cache:
8869 kmem_cache_destroy(x86_emulator_cache);
8870out:
8871 return r;
8872}
8873
8874void kvm_arch_exit(void)
8875{
8876#ifdef CONFIG_X86_64
8877 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
8878 clear_hv_tscchange_cb();
8879#endif
8880 kvm_lapic_exit();
8881
8882 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
8883 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
8884 CPUFREQ_TRANSITION_NOTIFIER);
8885 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
8886#ifdef CONFIG_X86_64
8887 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
8888 irq_work_sync(&pvclock_irq_work);
8889 cancel_work_sync(&pvclock_gtod_work);
8890#endif
8891 kvm_x86_ops.hardware_enable = NULL;
8892 kvm_mmu_module_exit();
8893 free_percpu(user_return_msrs);
8894 kmem_cache_destroy(x86_emulator_cache);
8895#ifdef CONFIG_KVM_XEN
8896 static_key_deferred_flush(&kvm_xen_enabled);
8897 WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
8898#endif
8899}
8900
8901static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
8902{
8903
8904
8905
8906
8907
8908
8909
8910 ++vcpu->stat.halt_exits;
8911 if (lapic_in_kernel(vcpu)) {
8912 vcpu->arch.mp_state = state;
8913 return 1;
8914 } else {
8915 vcpu->run->exit_reason = reason;
8916 return 0;
8917 }
8918}
8919
8920int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
8921{
8922 return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
8923}
8924EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
8925
8926int kvm_emulate_halt(struct kvm_vcpu *vcpu)
8927{
8928 int ret = kvm_skip_emulated_instruction(vcpu);
8929
8930
8931
8932
8933 return kvm_emulate_halt_noskip(vcpu) && ret;
8934}
8935EXPORT_SYMBOL_GPL(kvm_emulate_halt);
8936
8937int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
8938{
8939 int ret = kvm_skip_emulated_instruction(vcpu);
8940
8941 return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
8942 KVM_EXIT_AP_RESET_HOLD) && ret;
8943}
8944EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
8945
8946#ifdef CONFIG_X86_64
8947static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
8948 unsigned long clock_type)
8949{
8950 struct kvm_clock_pairing clock_pairing;
8951 struct timespec64 ts;
8952 u64 cycle;
8953 int ret;
8954
8955 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
8956 return -KVM_EOPNOTSUPP;
8957
8958
8959
8960
8961
8962 if (vcpu->arch.tsc_always_catchup)
8963 return -KVM_EOPNOTSUPP;
8964
8965 if (!kvm_get_walltime_and_clockread(&ts, &cycle))
8966 return -KVM_EOPNOTSUPP;
8967
8968 clock_pairing.sec = ts.tv_sec;
8969 clock_pairing.nsec = ts.tv_nsec;
8970 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
8971 clock_pairing.flags = 0;
8972 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
8973
8974 ret = 0;
8975 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
8976 sizeof(struct kvm_clock_pairing)))
8977 ret = -KVM_EFAULT;
8978
8979 return ret;
8980}
8981#endif
8982
8983
8984
8985
8986
8987
8988static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
8989{
8990 struct kvm_lapic_irq lapic_irq;
8991
8992 lapic_irq.shorthand = APIC_DEST_NOSHORT;
8993 lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
8994 lapic_irq.level = 0;
8995 lapic_irq.dest_id = apicid;
8996 lapic_irq.msi_redir_hint = false;
8997
8998 lapic_irq.delivery_mode = APIC_DM_REMRD;
8999 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
9000}
9001
9002bool kvm_apicv_activated(struct kvm *kvm)
9003{
9004 return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
9005}
9006EXPORT_SYMBOL_GPL(kvm_apicv_activated);
9007
9008static void kvm_apicv_init(struct kvm *kvm)
9009{
9010 init_rwsem(&kvm->arch.apicv_update_lock);
9011
9012 set_bit(APICV_INHIBIT_REASON_ABSENT,
9013 &kvm->arch.apicv_inhibit_reasons);
9014 if (!enable_apicv)
9015 set_bit(APICV_INHIBIT_REASON_DISABLE,
9016 &kvm->arch.apicv_inhibit_reasons);
9017}
9018
9019static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
9020{
9021 struct kvm_vcpu *target = NULL;
9022 struct kvm_apic_map *map;
9023
9024 vcpu->stat.directed_yield_attempted++;
9025
9026 if (single_task_running())
9027 goto no_yield;
9028
9029 rcu_read_lock();
9030 map = rcu_dereference(vcpu->kvm->arch.apic_map);
9031
9032 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
9033 target = map->phys_map[dest_id]->vcpu;
9034
9035 rcu_read_unlock();
9036
9037 if (!target || !READ_ONCE(target->ready))
9038 goto no_yield;
9039
9040
9041 if (vcpu == target)
9042 goto no_yield;
9043
9044 if (kvm_vcpu_yield_to(target) <= 0)
9045 goto no_yield;
9046
9047 vcpu->stat.directed_yield_successful++;
9048
9049no_yield:
9050 return;
9051}
9052
9053static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
9054{
9055 u64 ret = vcpu->run->hypercall.ret;
9056
9057 if (!is_64_bit_mode(vcpu))
9058 ret = (u32)ret;
9059 kvm_rax_write(vcpu, ret);
9060 ++vcpu->stat.hypercalls;
9061 return kvm_skip_emulated_instruction(vcpu);
9062}
9063
9064int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
9065{
9066 unsigned long nr, a0, a1, a2, a3, ret;
9067 int op_64_bit;
9068
9069 if (kvm_xen_hypercall_enabled(vcpu->kvm))
9070 return kvm_xen_hypercall(vcpu);
9071
9072 if (kvm_hv_hypercall_enabled(vcpu))
9073 return kvm_hv_hypercall(vcpu);
9074
9075 nr = kvm_rax_read(vcpu);
9076 a0 = kvm_rbx_read(vcpu);
9077 a1 = kvm_rcx_read(vcpu);
9078 a2 = kvm_rdx_read(vcpu);
9079 a3 = kvm_rsi_read(vcpu);
9080
9081 trace_kvm_hypercall(nr, a0, a1, a2, a3);
9082
9083 op_64_bit = is_64_bit_hypercall(vcpu);
9084 if (!op_64_bit) {
9085 nr &= 0xFFFFFFFF;
9086 a0 &= 0xFFFFFFFF;
9087 a1 &= 0xFFFFFFFF;
9088 a2 &= 0xFFFFFFFF;
9089 a3 &= 0xFFFFFFFF;
9090 }
9091
9092 if (static_call(kvm_x86_get_cpl)(vcpu) != 0) {
9093 ret = -KVM_EPERM;
9094 goto out;
9095 }
9096
9097 ret = -KVM_ENOSYS;
9098
9099 switch (nr) {
9100 case KVM_HC_VAPIC_POLL_IRQ:
9101 ret = 0;
9102 break;
9103 case KVM_HC_KICK_CPU:
9104 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
9105 break;
9106
9107 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
9108 kvm_sched_yield(vcpu, a1);
9109 ret = 0;
9110 break;
9111#ifdef CONFIG_X86_64
9112 case KVM_HC_CLOCK_PAIRING:
9113 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
9114 break;
9115#endif
9116 case KVM_HC_SEND_IPI:
9117 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
9118 break;
9119
9120 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
9121 break;
9122 case KVM_HC_SCHED_YIELD:
9123 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
9124 break;
9125
9126 kvm_sched_yield(vcpu, a0);
9127 ret = 0;
9128 break;
9129 case KVM_HC_MAP_GPA_RANGE: {
9130 u64 gpa = a0, npages = a1, attrs = a2;
9131
9132 ret = -KVM_ENOSYS;
9133 if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE)))
9134 break;
9135
9136 if (!PAGE_ALIGNED(gpa) || !npages ||
9137 gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
9138 ret = -KVM_EINVAL;
9139 break;
9140 }
9141
9142 vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
9143 vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
9144 vcpu->run->hypercall.args[0] = gpa;
9145 vcpu->run->hypercall.args[1] = npages;
9146 vcpu->run->hypercall.args[2] = attrs;
9147 vcpu->run->hypercall.longmode = op_64_bit;
9148 vcpu->arch.complete_userspace_io = complete_hypercall_exit;
9149 return 0;
9150 }
9151 default:
9152 ret = -KVM_ENOSYS;
9153 break;
9154 }
9155out:
9156 if (!op_64_bit)
9157 ret = (u32)ret;
9158 kvm_rax_write(vcpu, ret);
9159
9160 ++vcpu->stat.hypercalls;
9161 return kvm_skip_emulated_instruction(vcpu);
9162}
9163EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
9164
9165static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
9166{
9167 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
9168 char instruction[3];
9169 unsigned long rip = kvm_rip_read(vcpu);
9170
9171 static_call(kvm_x86_patch_hypercall)(vcpu, instruction);
9172
9173 return emulator_write_emulated(ctxt, rip, instruction, 3,
9174 &ctxt->exception);
9175}
9176
9177static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
9178{
9179 return vcpu->run->request_interrupt_window &&
9180 likely(!pic_in_kernel(vcpu->kvm));
9181}
9182
9183
9184static void post_kvm_run_save(struct kvm_vcpu *vcpu)
9185{
9186 struct kvm_run *kvm_run = vcpu->run;
9187
9188 kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu);
9189 kvm_run->cr8 = kvm_get_cr8(vcpu);
9190 kvm_run->apic_base = kvm_get_apic_base(vcpu);
9191
9192 kvm_run->ready_for_interrupt_injection =
9193 pic_in_kernel(vcpu->kvm) ||
9194 kvm_vcpu_ready_for_interrupt_injection(vcpu);
9195
9196 if (is_smm(vcpu))
9197 kvm_run->flags |= KVM_RUN_X86_SMM;
9198}
9199
9200static void update_cr8_intercept(struct kvm_vcpu *vcpu)
9201{
9202 int max_irr, tpr;
9203
9204 if (!kvm_x86_ops.update_cr8_intercept)
9205 return;
9206
9207 if (!lapic_in_kernel(vcpu))
9208 return;
9209
9210 if (vcpu->arch.apicv_active)
9211 return;
9212
9213 if (!vcpu->arch.apic->vapic_addr)
9214 max_irr = kvm_lapic_find_highest_irr(vcpu);
9215 else
9216 max_irr = -1;
9217
9218 if (max_irr != -1)
9219 max_irr >>= 4;
9220
9221 tpr = kvm_lapic_get_cr8(vcpu);
9222
9223 static_call(kvm_x86_update_cr8_intercept)(vcpu, tpr, max_irr);
9224}
9225
9226
9227int kvm_check_nested_events(struct kvm_vcpu *vcpu)
9228{
9229 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
9230 kvm_x86_ops.nested_ops->triple_fault(vcpu);
9231 return 1;
9232 }
9233
9234 return kvm_x86_ops.nested_ops->check_events(vcpu);
9235}
9236
9237static void kvm_inject_exception(struct kvm_vcpu *vcpu)
9238{
9239 if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
9240 vcpu->arch.exception.error_code = false;
9241 static_call(kvm_x86_queue_exception)(vcpu);
9242}
9243
9244static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
9245{
9246 int r;
9247 bool can_inject = true;
9248
9249
9250
9251 if (vcpu->arch.exception.injected) {
9252 kvm_inject_exception(vcpu);
9253 can_inject = false;
9254 }
9255
9256
9257
9258
9259
9260
9261
9262
9263
9264
9265
9266
9267
9268
9269 else if (!vcpu->arch.exception.pending) {
9270 if (vcpu->arch.nmi_injected) {
9271 static_call(kvm_x86_set_nmi)(vcpu);
9272 can_inject = false;
9273 } else if (vcpu->arch.interrupt.injected) {
9274 static_call(kvm_x86_set_irq)(vcpu);
9275 can_inject = false;
9276 }
9277 }
9278
9279 WARN_ON_ONCE(vcpu->arch.exception.injected &&
9280 vcpu->arch.exception.pending);
9281
9282
9283
9284
9285
9286
9287
9288 if (is_guest_mode(vcpu)) {
9289 r = kvm_check_nested_events(vcpu);
9290 if (r < 0)
9291 goto out;
9292 }
9293
9294
9295 if (vcpu->arch.exception.pending) {
9296 trace_kvm_inj_exception(vcpu->arch.exception.nr,
9297 vcpu->arch.exception.has_error_code,
9298 vcpu->arch.exception.error_code);
9299
9300 vcpu->arch.exception.pending = false;
9301 vcpu->arch.exception.injected = true;
9302
9303 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
9304 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
9305 X86_EFLAGS_RF);
9306
9307 if (vcpu->arch.exception.nr == DB_VECTOR) {
9308 kvm_deliver_exception_payload(vcpu);
9309 if (vcpu->arch.dr7 & DR7_GD) {
9310 vcpu->arch.dr7 &= ~DR7_GD;
9311 kvm_update_dr7(vcpu);
9312 }
9313 }
9314
9315 kvm_inject_exception(vcpu);
9316 can_inject = false;
9317 }
9318
9319
9320 if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
9321 return 0;
9322
9323
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334 if (vcpu->arch.smi_pending) {
9335 r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
9336 if (r < 0)
9337 goto out;
9338 if (r) {
9339 vcpu->arch.smi_pending = false;
9340 ++vcpu->arch.smi_count;
9341 enter_smm(vcpu);
9342 can_inject = false;
9343 } else
9344 static_call(kvm_x86_enable_smi_window)(vcpu);
9345 }
9346
9347 if (vcpu->arch.nmi_pending) {
9348 r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
9349 if (r < 0)
9350 goto out;
9351 if (r) {
9352 --vcpu->arch.nmi_pending;
9353 vcpu->arch.nmi_injected = true;
9354 static_call(kvm_x86_set_nmi)(vcpu);
9355 can_inject = false;
9356 WARN_ON(static_call(kvm_x86_nmi_allowed)(vcpu, true) < 0);
9357 }
9358 if (vcpu->arch.nmi_pending)
9359 static_call(kvm_x86_enable_nmi_window)(vcpu);
9360 }
9361
9362 if (kvm_cpu_has_injectable_intr(vcpu)) {
9363 r = can_inject ? static_call(kvm_x86_interrupt_allowed)(vcpu, true) : -EBUSY;
9364 if (r < 0)
9365 goto out;
9366 if (r) {
9367 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
9368 static_call(kvm_x86_set_irq)(vcpu);
9369 WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
9370 }
9371 if (kvm_cpu_has_injectable_intr(vcpu))
9372 static_call(kvm_x86_enable_irq_window)(vcpu);
9373 }
9374
9375 if (is_guest_mode(vcpu) &&
9376 kvm_x86_ops.nested_ops->hv_timer_pending &&
9377 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
9378 *req_immediate_exit = true;
9379
9380 WARN_ON(vcpu->arch.exception.pending);
9381 return 0;
9382
9383out:
9384 if (r == -EBUSY) {
9385 *req_immediate_exit = true;
9386 r = 0;
9387 }
9388 return r;
9389}
9390
9391static void process_nmi(struct kvm_vcpu *vcpu)
9392{
9393 unsigned limit = 2;
9394
9395
9396
9397
9398
9399
9400 if (static_call(kvm_x86_get_nmi_mask)(vcpu) || vcpu->arch.nmi_injected)
9401 limit = 1;
9402
9403 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
9404 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
9405 kvm_make_request(KVM_REQ_EVENT, vcpu);
9406}
9407
9408static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
9409{
9410 u32 flags = 0;
9411 flags |= seg->g << 23;
9412 flags |= seg->db << 22;
9413 flags |= seg->l << 21;
9414 flags |= seg->avl << 20;
9415 flags |= seg->present << 15;
9416 flags |= seg->dpl << 13;
9417 flags |= seg->s << 12;
9418 flags |= seg->type << 8;
9419 return flags;
9420}
9421
9422static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
9423{
9424 struct kvm_segment seg;
9425 int offset;
9426
9427 kvm_get_segment(vcpu, &seg, n);
9428 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
9429
9430 if (n < 3)
9431 offset = 0x7f84 + n * 12;
9432 else
9433 offset = 0x7f2c + (n - 3) * 12;
9434
9435 put_smstate(u32, buf, offset + 8, seg.base);
9436 put_smstate(u32, buf, offset + 4, seg.limit);
9437 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
9438}
9439
9440#ifdef CONFIG_X86_64
9441static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
9442{
9443 struct kvm_segment seg;
9444 int offset;
9445 u16 flags;
9446
9447 kvm_get_segment(vcpu, &seg, n);
9448 offset = 0x7e00 + n * 16;
9449
9450 flags = enter_smm_get_segment_flags(&seg) >> 8;
9451 put_smstate(u16, buf, offset, seg.selector);
9452 put_smstate(u16, buf, offset + 2, flags);
9453 put_smstate(u32, buf, offset + 4, seg.limit);
9454 put_smstate(u64, buf, offset + 8, seg.base);
9455}
9456#endif
9457
9458static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
9459{
9460 struct desc_ptr dt;
9461 struct kvm_segment seg;
9462 unsigned long val;
9463 int i;
9464
9465 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
9466 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
9467 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
9468 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
9469
9470 for (i = 0; i < 8; i++)
9471 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
9472
9473 kvm_get_dr(vcpu, 6, &val);
9474 put_smstate(u32, buf, 0x7fcc, (u32)val);
9475 kvm_get_dr(vcpu, 7, &val);
9476 put_smstate(u32, buf, 0x7fc8, (u32)val);
9477
9478 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
9479 put_smstate(u32, buf, 0x7fc4, seg.selector);
9480 put_smstate(u32, buf, 0x7f64, seg.base);
9481 put_smstate(u32, buf, 0x7f60, seg.limit);
9482 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
9483
9484 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
9485 put_smstate(u32, buf, 0x7fc0, seg.selector);
9486 put_smstate(u32, buf, 0x7f80, seg.base);
9487 put_smstate(u32, buf, 0x7f7c, seg.limit);
9488 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
9489
9490 static_call(kvm_x86_get_gdt)(vcpu, &dt);
9491 put_smstate(u32, buf, 0x7f74, dt.address);
9492 put_smstate(u32, buf, 0x7f70, dt.size);
9493
9494 static_call(kvm_x86_get_idt)(vcpu, &dt);
9495 put_smstate(u32, buf, 0x7f58, dt.address);
9496 put_smstate(u32, buf, 0x7f54, dt.size);
9497
9498 for (i = 0; i < 6; i++)
9499 enter_smm_save_seg_32(vcpu, buf, i);
9500
9501 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
9502
9503
9504 put_smstate(u32, buf, 0x7efc, 0x00020000);
9505 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
9506}
9507
9508#ifdef CONFIG_X86_64
9509static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
9510{
9511 struct desc_ptr dt;
9512 struct kvm_segment seg;
9513 unsigned long val;
9514 int i;
9515
9516 for (i = 0; i < 16; i++)
9517 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
9518
9519 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
9520 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
9521
9522 kvm_get_dr(vcpu, 6, &val);
9523 put_smstate(u64, buf, 0x7f68, val);
9524 kvm_get_dr(vcpu, 7, &val);
9525 put_smstate(u64, buf, 0x7f60, val);
9526
9527 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
9528 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
9529 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
9530
9531 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
9532
9533
9534 put_smstate(u32, buf, 0x7efc, 0x00020064);
9535
9536 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
9537
9538 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
9539 put_smstate(u16, buf, 0x7e90, seg.selector);
9540 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
9541 put_smstate(u32, buf, 0x7e94, seg.limit);
9542 put_smstate(u64, buf, 0x7e98, seg.base);
9543
9544 static_call(kvm_x86_get_idt)(vcpu, &dt);
9545 put_smstate(u32, buf, 0x7e84, dt.size);
9546 put_smstate(u64, buf, 0x7e88, dt.address);
9547
9548 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
9549 put_smstate(u16, buf, 0x7e70, seg.selector);
9550 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
9551 put_smstate(u32, buf, 0x7e74, seg.limit);
9552 put_smstate(u64, buf, 0x7e78, seg.base);
9553
9554 static_call(kvm_x86_get_gdt)(vcpu, &dt);
9555 put_smstate(u32, buf, 0x7e64, dt.size);
9556 put_smstate(u64, buf, 0x7e68, dt.address);
9557
9558 for (i = 0; i < 6; i++)
9559 enter_smm_save_seg_64(vcpu, buf, i);
9560}
9561#endif
9562
9563static void enter_smm(struct kvm_vcpu *vcpu)
9564{
9565 struct kvm_segment cs, ds;
9566 struct desc_ptr dt;
9567 unsigned long cr0;
9568 char buf[512];
9569
9570 memset(buf, 0, 512);
9571#ifdef CONFIG_X86_64
9572 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
9573 enter_smm_save_state_64(vcpu, buf);
9574 else
9575#endif
9576 enter_smm_save_state_32(vcpu, buf);
9577
9578
9579
9580
9581
9582
9583 static_call(kvm_x86_enter_smm)(vcpu, buf);
9584
9585 kvm_smm_changed(vcpu, true);
9586 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
9587
9588 if (static_call(kvm_x86_get_nmi_mask)(vcpu))
9589 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
9590 else
9591 static_call(kvm_x86_set_nmi_mask)(vcpu, true);
9592
9593 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
9594 kvm_rip_write(vcpu, 0x8000);
9595
9596 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
9597 static_call(kvm_x86_set_cr0)(vcpu, cr0);
9598 vcpu->arch.cr0 = cr0;
9599
9600 static_call(kvm_x86_set_cr4)(vcpu, 0);
9601
9602
9603 dt.address = dt.size = 0;
9604 static_call(kvm_x86_set_idt)(vcpu, &dt);
9605
9606 kvm_set_dr(vcpu, 7, DR7_FIXED_1);
9607
9608 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
9609 cs.base = vcpu->arch.smbase;
9610
9611 ds.selector = 0;
9612 ds.base = 0;
9613
9614 cs.limit = ds.limit = 0xffffffff;
9615 cs.type = ds.type = 0x3;
9616 cs.dpl = ds.dpl = 0;
9617 cs.db = ds.db = 0;
9618 cs.s = ds.s = 1;
9619 cs.l = ds.l = 0;
9620 cs.g = ds.g = 1;
9621 cs.avl = ds.avl = 0;
9622 cs.present = ds.present = 1;
9623 cs.unusable = ds.unusable = 0;
9624 cs.padding = ds.padding = 0;
9625
9626 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
9627 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
9628 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
9629 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
9630 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
9631 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
9632
9633#ifdef CONFIG_X86_64
9634 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
9635 static_call(kvm_x86_set_efer)(vcpu, 0);
9636#endif
9637
9638 kvm_update_cpuid_runtime(vcpu);
9639 kvm_mmu_reset_context(vcpu);
9640}
9641
9642static void process_smi(struct kvm_vcpu *vcpu)
9643{
9644 vcpu->arch.smi_pending = true;
9645 kvm_make_request(KVM_REQ_EVENT, vcpu);
9646}
9647
9648void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
9649 unsigned long *vcpu_bitmap)
9650{
9651 kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, vcpu_bitmap);
9652}
9653
9654void kvm_make_scan_ioapic_request(struct kvm *kvm)
9655{
9656 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
9657}
9658
9659void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
9660{
9661 bool activate;
9662
9663 if (!lapic_in_kernel(vcpu))
9664 return;
9665
9666 down_read(&vcpu->kvm->arch.apicv_update_lock);
9667
9668 activate = kvm_apicv_activated(vcpu->kvm);
9669 if (vcpu->arch.apicv_active == activate)
9670 goto out;
9671
9672 vcpu->arch.apicv_active = activate;
9673 kvm_apic_update_apicv(vcpu);
9674 static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
9675
9676
9677
9678
9679
9680
9681
9682 if (!vcpu->arch.apicv_active)
9683 kvm_make_request(KVM_REQ_EVENT, vcpu);
9684
9685out:
9686 up_read(&vcpu->kvm->arch.apicv_update_lock);
9687}
9688EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
9689
9690void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
9691{
9692 unsigned long old, new;
9693
9694 lockdep_assert_held_write(&kvm->arch.apicv_update_lock);
9695
9696 if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
9697 !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
9698 return;
9699
9700 old = new = kvm->arch.apicv_inhibit_reasons;
9701
9702 if (activate)
9703 __clear_bit(bit, &new);
9704 else
9705 __set_bit(bit, &new);
9706
9707 if (!!old != !!new) {
9708 trace_kvm_apicv_update_request(activate, bit);
9709
9710
9711
9712
9713
9714
9715
9716
9717
9718
9719
9720
9721 kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE);
9722 kvm->arch.apicv_inhibit_reasons = new;
9723 if (new) {
9724 unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE);
9725 kvm_zap_gfn_range(kvm, gfn, gfn+1);
9726 }
9727 } else
9728 kvm->arch.apicv_inhibit_reasons = new;
9729}
9730EXPORT_SYMBOL_GPL(__kvm_request_apicv_update);
9731
9732void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
9733{
9734 down_write(&kvm->arch.apicv_update_lock);
9735 __kvm_request_apicv_update(kvm, activate, bit);
9736 up_write(&kvm->arch.apicv_update_lock);
9737}
9738EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
9739
9740static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
9741{
9742 if (!kvm_apic_present(vcpu))
9743 return;
9744
9745 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
9746
9747 if (irqchip_split(vcpu->kvm))
9748 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
9749 else {
9750 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
9751 if (ioapic_in_kernel(vcpu->kvm))
9752 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
9753 }
9754
9755 if (is_guest_mode(vcpu))
9756 vcpu->arch.load_eoi_exitmap_pending = true;
9757 else
9758 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
9759}
9760
9761static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
9762{
9763 u64 eoi_exit_bitmap[4];
9764
9765 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
9766 return;
9767
9768 if (to_hv_vcpu(vcpu)) {
9769 bitmap_or((ulong *)eoi_exit_bitmap,
9770 vcpu->arch.ioapic_handled_vectors,
9771 to_hv_synic(vcpu)->vec_bitmap, 256);
9772 static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
9773 return;
9774 }
9775
9776 static_call(kvm_x86_load_eoi_exitmap)(
9777 vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
9778}
9779
9780void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
9781 unsigned long start, unsigned long end)
9782{
9783 unsigned long apic_address;
9784
9785
9786
9787
9788
9789 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
9790 if (start <= apic_address && apic_address < end)
9791 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
9792}
9793
9794static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
9795{
9796 if (!lapic_in_kernel(vcpu))
9797 return;
9798
9799 if (!kvm_x86_ops.set_apic_access_page_addr)
9800 return;
9801
9802 static_call(kvm_x86_set_apic_access_page_addr)(vcpu);
9803}
9804
9805void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
9806{
9807 smp_send_reschedule(vcpu->cpu);
9808}
9809EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
9810
9811
9812
9813
9814
9815
9816
9817static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
9818{
9819 int r;
9820 bool req_int_win =
9821 dm_request_for_irq_injection(vcpu) &&
9822 kvm_cpu_accept_dm_intr(vcpu);
9823 fastpath_t exit_fastpath;
9824
9825 bool req_immediate_exit = false;
9826
9827
9828 if (unlikely(vcpu->kvm->dirty_ring_size &&
9829 kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
9830 vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
9831 trace_kvm_dirty_ring_exit(vcpu);
9832 r = 0;
9833 goto out;
9834 }
9835
9836 if (kvm_request_pending(vcpu)) {
9837 if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
9838 r = -EIO;
9839 goto out;
9840 }
9841 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
9842 if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
9843 r = 0;
9844 goto out;
9845 }
9846 }
9847 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
9848 kvm_mmu_unload(vcpu);
9849 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
9850 __kvm_migrate_timers(vcpu);
9851 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
9852 kvm_update_masterclock(vcpu->kvm);
9853 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
9854 kvm_gen_kvmclock_update(vcpu);
9855 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
9856 r = kvm_guest_time_update(vcpu);
9857 if (unlikely(r))
9858 goto out;
9859 }
9860 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
9861 kvm_mmu_sync_roots(vcpu);
9862 if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
9863 kvm_mmu_load_pgd(vcpu);
9864 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
9865 kvm_vcpu_flush_tlb_all(vcpu);
9866
9867
9868 kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
9869 }
9870 kvm_service_local_tlb_flush_requests(vcpu);
9871
9872 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
9873 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
9874 r = 0;
9875 goto out;
9876 }
9877 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
9878 if (is_guest_mode(vcpu)) {
9879 kvm_x86_ops.nested_ops->triple_fault(vcpu);
9880 } else {
9881 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
9882 vcpu->mmio_needed = 0;
9883 r = 0;
9884 goto out;
9885 }
9886 }
9887 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
9888
9889 vcpu->arch.apf.halted = true;
9890 r = 1;
9891 goto out;
9892 }
9893 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
9894 record_steal_time(vcpu);
9895 if (kvm_check_request(KVM_REQ_SMI, vcpu))
9896 process_smi(vcpu);
9897 if (kvm_check_request(KVM_REQ_NMI, vcpu))
9898 process_nmi(vcpu);
9899 if (kvm_check_request(KVM_REQ_PMU, vcpu))
9900 kvm_pmu_handle_event(vcpu);
9901 if (kvm_check_request(KVM_REQ_PMI, vcpu))
9902 kvm_pmu_deliver_pmi(vcpu);
9903 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
9904 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
9905 if (test_bit(vcpu->arch.pending_ioapic_eoi,
9906 vcpu->arch.ioapic_handled_vectors)) {
9907 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
9908 vcpu->run->eoi.vector =
9909 vcpu->arch.pending_ioapic_eoi;
9910 r = 0;
9911 goto out;
9912 }
9913 }
9914 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
9915 vcpu_scan_ioapic(vcpu);
9916 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
9917 vcpu_load_eoi_exitmap(vcpu);
9918 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
9919 kvm_vcpu_reload_apic_access_page(vcpu);
9920 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
9921 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
9922 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
9923 r = 0;
9924 goto out;
9925 }
9926 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
9927 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
9928 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
9929 r = 0;
9930 goto out;
9931 }
9932 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
9933 struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
9934
9935 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
9936 vcpu->run->hyperv = hv_vcpu->exit;
9937 r = 0;
9938 goto out;
9939 }
9940
9941
9942
9943
9944
9945
9946 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
9947 kvm_hv_process_stimers(vcpu);
9948 if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
9949 kvm_vcpu_update_apicv(vcpu);
9950 if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
9951 kvm_check_async_pf_completion(vcpu);
9952 if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
9953 static_call(kvm_x86_msr_filter_changed)(vcpu);
9954
9955 if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
9956 static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
9957 }
9958
9959 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
9960 kvm_xen_has_interrupt(vcpu)) {
9961 ++vcpu->stat.req_event;
9962 r = kvm_apic_accept_events(vcpu);
9963 if (r < 0) {
9964 r = 0;
9965 goto out;
9966 }
9967 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
9968 r = 1;
9969 goto out;
9970 }
9971
9972 r = inject_pending_event(vcpu, &req_immediate_exit);
9973 if (r < 0) {
9974 r = 0;
9975 goto out;
9976 }
9977 if (req_int_win)
9978 static_call(kvm_x86_enable_irq_window)(vcpu);
9979
9980 if (kvm_lapic_enabled(vcpu)) {
9981 update_cr8_intercept(vcpu);
9982 kvm_lapic_sync_to_vapic(vcpu);
9983 }
9984 }
9985
9986 r = kvm_mmu_reload(vcpu);
9987 if (unlikely(r)) {
9988 goto cancel_injection;
9989 }
9990
9991 preempt_disable();
9992
9993 static_call(kvm_x86_prepare_guest_switch)(vcpu);
9994
9995
9996
9997
9998
9999
10000 local_irq_disable();
10001
10002
10003 smp_store_release(&vcpu->mode, IN_GUEST_MODE);
10004
10005 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
10006
10007
10008
10009
10010
10011
10012
10013
10014
10015
10016
10017
10018
10019 smp_mb__after_srcu_read_unlock();
10020
10021
10022
10023
10024
10025
10026
10027
10028 if (kvm_lapic_enabled(vcpu))
10029 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
10030
10031 if (kvm_vcpu_exit_request(vcpu)) {
10032 vcpu->mode = OUTSIDE_GUEST_MODE;
10033 smp_wmb();
10034 local_irq_enable();
10035 preempt_enable();
10036 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
10037 r = 1;
10038 goto cancel_injection;
10039 }
10040
10041 if (req_immediate_exit) {
10042 kvm_make_request(KVM_REQ_EVENT, vcpu);
10043 static_call(kvm_x86_request_immediate_exit)(vcpu);
10044 }
10045
10046 fpregs_assert_state_consistent();
10047 if (test_thread_flag(TIF_NEED_FPU_LOAD))
10048 switch_fpu_return();
10049
10050 if (vcpu->arch.guest_fpu.xfd_err)
10051 wrmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
10052
10053 if (unlikely(vcpu->arch.switch_db_regs)) {
10054 set_debugreg(0, 7);
10055 set_debugreg(vcpu->arch.eff_db[0], 0);
10056 set_debugreg(vcpu->arch.eff_db[1], 1);
10057 set_debugreg(vcpu->arch.eff_db[2], 2);
10058 set_debugreg(vcpu->arch.eff_db[3], 3);
10059 } else if (unlikely(hw_breakpoint_active())) {
10060 set_debugreg(0, 7);
10061 }
10062
10063 guest_timing_enter_irqoff();
10064
10065 for (;;) {
10066
10067
10068
10069
10070
10071
10072 WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
10073
10074 exit_fastpath = static_call(kvm_x86_run)(vcpu);
10075 if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
10076 break;
10077
10078 if (kvm_lapic_enabled(vcpu))
10079 static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
10080
10081 if (unlikely(kvm_vcpu_exit_request(vcpu))) {
10082 exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
10083 break;
10084 }
10085 }
10086
10087
10088
10089
10090
10091
10092
10093 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
10094 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
10095 static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
10096 kvm_update_dr0123(vcpu);
10097 kvm_update_dr7(vcpu);
10098 }
10099
10100
10101
10102
10103
10104
10105
10106
10107 if (hw_breakpoint_active())
10108 hw_breakpoint_restore();
10109
10110 vcpu->arch.last_vmentry_cpu = vcpu->cpu;
10111 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
10112
10113 vcpu->mode = OUTSIDE_GUEST_MODE;
10114 smp_wmb();
10115
10116
10117
10118
10119
10120
10121 if (vcpu->arch.xfd_no_write_intercept)
10122 fpu_sync_guest_vmexit_xfd_state();
10123
10124 static_call(kvm_x86_handle_exit_irqoff)(vcpu);
10125
10126 if (vcpu->arch.guest_fpu.xfd_err)
10127 wrmsrl(MSR_IA32_XFD_ERR, 0);
10128
10129
10130
10131
10132
10133
10134
10135
10136 kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
10137 local_irq_enable();
10138 ++vcpu->stat.exits;
10139 local_irq_disable();
10140 kvm_after_interrupt(vcpu);
10141
10142
10143
10144
10145
10146
10147
10148
10149 guest_timing_exit_irqoff();
10150
10151 if (lapic_in_kernel(vcpu)) {
10152 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
10153 if (delta != S64_MIN) {
10154 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
10155 vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
10156 }
10157 }
10158
10159 local_irq_enable();
10160 preempt_enable();
10161
10162 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
10163
10164
10165
10166
10167 if (unlikely(prof_on == KVM_PROFILING)) {
10168 unsigned long rip = kvm_rip_read(vcpu);
10169 profile_hit(KVM_PROFILING, (void *)rip);
10170 }
10171
10172 if (unlikely(vcpu->arch.tsc_always_catchup))
10173 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
10174
10175 if (vcpu->arch.apic_attention)
10176 kvm_lapic_sync_from_vapic(vcpu);
10177
10178 r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath);
10179 return r;
10180
10181cancel_injection:
10182 if (req_immediate_exit)
10183 kvm_make_request(KVM_REQ_EVENT, vcpu);
10184 static_call(kvm_x86_cancel_injection)(vcpu);
10185 if (unlikely(vcpu->arch.apic_attention))
10186 kvm_lapic_sync_from_vapic(vcpu);
10187out:
10188 return r;
10189}
10190
10191
10192static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
10193{
10194 bool hv_timer;
10195
10196 if (!kvm_arch_vcpu_runnable(vcpu)) {
10197
10198
10199
10200
10201
10202
10203
10204 hv_timer = kvm_lapic_hv_timer_in_use(vcpu);
10205 if (hv_timer)
10206 kvm_lapic_switch_to_sw_timer(vcpu);
10207
10208 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
10209 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
10210 kvm_vcpu_halt(vcpu);
10211 else
10212 kvm_vcpu_block(vcpu);
10213 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
10214
10215 if (hv_timer)
10216 kvm_lapic_switch_to_hv_timer(vcpu);
10217
10218 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
10219 return 1;
10220 }
10221
10222 if (kvm_apic_accept_events(vcpu) < 0)
10223 return 0;
10224 switch(vcpu->arch.mp_state) {
10225 case KVM_MP_STATE_HALTED:
10226 case KVM_MP_STATE_AP_RESET_HOLD:
10227 vcpu->arch.pv.pv_unhalted = false;
10228 vcpu->arch.mp_state =
10229 KVM_MP_STATE_RUNNABLE;
10230 fallthrough;
10231 case KVM_MP_STATE_RUNNABLE:
10232 vcpu->arch.apf.halted = false;
10233 break;
10234 case KVM_MP_STATE_INIT_RECEIVED:
10235 break;
10236 default:
10237 return -EINTR;
10238 }
10239 return 1;
10240}
10241
10242static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
10243{
10244 if (is_guest_mode(vcpu))
10245 kvm_check_nested_events(vcpu);
10246
10247 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
10248 !vcpu->arch.apf.halted);
10249}
10250
10251
10252static int vcpu_run(struct kvm_vcpu *vcpu)
10253{
10254 int r;
10255 struct kvm *kvm = vcpu->kvm;
10256
10257 vcpu->arch.l1tf_flush_l1d = true;
10258
10259 for (;;) {
10260 if (kvm_vcpu_running(vcpu)) {
10261 r = vcpu_enter_guest(vcpu);
10262 } else {
10263 r = vcpu_block(kvm, vcpu);
10264 }
10265
10266 if (r <= 0)
10267 break;
10268
10269 kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
10270 if (kvm_cpu_has_pending_timer(vcpu))
10271 kvm_inject_pending_timer_irqs(vcpu);
10272
10273 if (dm_request_for_irq_injection(vcpu) &&
10274 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
10275 r = 0;
10276 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
10277 ++vcpu->stat.request_irq_exits;
10278 break;
10279 }
10280
10281 if (__xfer_to_guest_mode_work_pending()) {
10282 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
10283 r = xfer_to_guest_mode_handle_work(vcpu);
10284 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
10285 if (r)
10286 return r;
10287 }
10288 }
10289
10290 return r;
10291}
10292
10293static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
10294{
10295 int r;
10296
10297 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
10298 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
10299 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
10300 return r;
10301}
10302
10303static int complete_emulated_pio(struct kvm_vcpu *vcpu)
10304{
10305 BUG_ON(!vcpu->arch.pio.count);
10306
10307 return complete_emulated_io(vcpu);
10308}
10309
10310
10311
10312
10313
10314
10315
10316
10317
10318
10319
10320
10321
10322
10323
10324
10325
10326
10327
10328static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
10329{
10330 struct kvm_run *run = vcpu->run;
10331 struct kvm_mmio_fragment *frag;
10332 unsigned len;
10333
10334 BUG_ON(!vcpu->mmio_needed);
10335
10336
10337 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
10338 len = min(8u, frag->len);
10339 if (!vcpu->mmio_is_write)
10340 memcpy(frag->data, run->mmio.data, len);
10341
10342 if (frag->len <= 8) {
10343
10344 frag++;
10345 vcpu->mmio_cur_fragment++;
10346 } else {
10347
10348 frag->data += len;
10349 frag->gpa += len;
10350 frag->len -= len;
10351 }
10352
10353 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
10354 vcpu->mmio_needed = 0;
10355
10356
10357 if (vcpu->mmio_is_write)
10358 return 1;
10359 vcpu->mmio_read_completed = 1;
10360 return complete_emulated_io(vcpu);
10361 }
10362
10363 run->exit_reason = KVM_EXIT_MMIO;
10364 run->mmio.phys_addr = frag->gpa;
10365 if (vcpu->mmio_is_write)
10366 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
10367 run->mmio.len = min(8u, frag->len);
10368 run->mmio.is_write = vcpu->mmio_is_write;
10369 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
10370 return 0;
10371}
10372
10373
10374static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
10375{
10376
10377
10378
10379
10380 fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true);
10381 trace_kvm_fpu(1);
10382}
10383
10384
10385static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
10386{
10387 fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false);
10388 ++vcpu->stat.fpu_reload;
10389 trace_kvm_fpu(0);
10390}
10391
10392int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
10393{
10394 struct kvm_run *kvm_run = vcpu->run;
10395 struct kvm *kvm = vcpu->kvm;
10396 int r;
10397
10398 vcpu_load(vcpu);
10399 kvm_sigset_activate(vcpu);
10400 kvm_run->flags = 0;
10401 kvm_load_guest_fpu(vcpu);
10402
10403 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
10404 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
10405 if (kvm_run->immediate_exit) {
10406 r = -EINTR;
10407 goto out;
10408 }
10409
10410
10411
10412
10413 WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
10414
10415 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
10416 kvm_vcpu_block(vcpu);
10417 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
10418
10419 if (kvm_apic_accept_events(vcpu) < 0) {
10420 r = 0;
10421 goto out;
10422 }
10423 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
10424 r = -EAGAIN;
10425 if (signal_pending(current)) {
10426 r = -EINTR;
10427 kvm_run->exit_reason = KVM_EXIT_INTR;
10428 ++vcpu->stat.signal_exits;
10429 }
10430 goto out;
10431 }
10432
10433 if ((kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) ||
10434 (kvm_run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)) {
10435 r = -EINVAL;
10436 goto out;
10437 }
10438
10439 if (kvm_run->kvm_dirty_regs) {
10440 r = sync_regs(vcpu);
10441 if (r != 0)
10442 goto out;
10443 }
10444
10445
10446 if (!lapic_in_kernel(vcpu)) {
10447 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
10448 r = -EINVAL;
10449 goto out;
10450 }
10451 }
10452
10453 if (unlikely(vcpu->arch.complete_userspace_io)) {
10454 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
10455 vcpu->arch.complete_userspace_io = NULL;
10456 r = cui(vcpu);
10457 if (r <= 0)
10458 goto out;
10459 } else
10460 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
10461
10462 if (kvm_run->immediate_exit) {
10463 r = -EINTR;
10464 goto out;
10465 }
10466
10467 r = static_call(kvm_x86_vcpu_pre_run)(vcpu);
10468 if (r <= 0)
10469 goto out;
10470
10471 r = vcpu_run(vcpu);
10472
10473out:
10474 kvm_put_guest_fpu(vcpu);
10475 if (kvm_run->kvm_valid_regs)
10476 store_regs(vcpu);
10477 post_kvm_run_save(vcpu);
10478 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
10479
10480 kvm_sigset_deactivate(vcpu);
10481 vcpu_put(vcpu);
10482 return r;
10483}
10484
10485static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10486{
10487 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
10488
10489
10490
10491
10492
10493
10494
10495 emulator_writeback_register_cache(vcpu->arch.emulate_ctxt);
10496 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
10497 }
10498 regs->rax = kvm_rax_read(vcpu);
10499 regs->rbx = kvm_rbx_read(vcpu);
10500 regs->rcx = kvm_rcx_read(vcpu);
10501 regs->rdx = kvm_rdx_read(vcpu);
10502 regs->rsi = kvm_rsi_read(vcpu);
10503 regs->rdi = kvm_rdi_read(vcpu);
10504 regs->rsp = kvm_rsp_read(vcpu);
10505 regs->rbp = kvm_rbp_read(vcpu);
10506#ifdef CONFIG_X86_64
10507 regs->r8 = kvm_r8_read(vcpu);
10508 regs->r9 = kvm_r9_read(vcpu);
10509 regs->r10 = kvm_r10_read(vcpu);
10510 regs->r11 = kvm_r11_read(vcpu);
10511 regs->r12 = kvm_r12_read(vcpu);
10512 regs->r13 = kvm_r13_read(vcpu);
10513 regs->r14 = kvm_r14_read(vcpu);
10514 regs->r15 = kvm_r15_read(vcpu);
10515#endif
10516
10517 regs->rip = kvm_rip_read(vcpu);
10518 regs->rflags = kvm_get_rflags(vcpu);
10519}
10520
10521int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10522{
10523 vcpu_load(vcpu);
10524 __get_regs(vcpu, regs);
10525 vcpu_put(vcpu);
10526 return 0;
10527}
10528
10529static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10530{
10531 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
10532 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
10533
10534 kvm_rax_write(vcpu, regs->rax);
10535 kvm_rbx_write(vcpu, regs->rbx);
10536 kvm_rcx_write(vcpu, regs->rcx);
10537 kvm_rdx_write(vcpu, regs->rdx);
10538 kvm_rsi_write(vcpu, regs->rsi);
10539 kvm_rdi_write(vcpu, regs->rdi);
10540 kvm_rsp_write(vcpu, regs->rsp);
10541 kvm_rbp_write(vcpu, regs->rbp);
10542#ifdef CONFIG_X86_64
10543 kvm_r8_write(vcpu, regs->r8);
10544 kvm_r9_write(vcpu, regs->r9);
10545 kvm_r10_write(vcpu, regs->r10);
10546 kvm_r11_write(vcpu, regs->r11);
10547 kvm_r12_write(vcpu, regs->r12);
10548 kvm_r13_write(vcpu, regs->r13);
10549 kvm_r14_write(vcpu, regs->r14);
10550 kvm_r15_write(vcpu, regs->r15);
10551#endif
10552
10553 kvm_rip_write(vcpu, regs->rip);
10554 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
10555
10556 vcpu->arch.exception.pending = false;
10557
10558 kvm_make_request(KVM_REQ_EVENT, vcpu);
10559}
10560
10561int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
10562{
10563 vcpu_load(vcpu);
10564 __set_regs(vcpu, regs);
10565 vcpu_put(vcpu);
10566 return 0;
10567}
10568
10569void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
10570{
10571 struct kvm_segment cs;
10572
10573 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
10574 *db = cs.db;
10575 *l = cs.l;
10576}
10577EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
10578
10579static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10580{
10581 struct desc_ptr dt;
10582
10583 if (vcpu->arch.guest_state_protected)
10584 goto skip_protected_regs;
10585
10586 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
10587 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
10588 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
10589 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
10590 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
10591 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
10592
10593 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
10594 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
10595
10596 static_call(kvm_x86_get_idt)(vcpu, &dt);
10597 sregs->idt.limit = dt.size;
10598 sregs->idt.base = dt.address;
10599 static_call(kvm_x86_get_gdt)(vcpu, &dt);
10600 sregs->gdt.limit = dt.size;
10601 sregs->gdt.base = dt.address;
10602
10603 sregs->cr2 = vcpu->arch.cr2;
10604 sregs->cr3 = kvm_read_cr3(vcpu);
10605
10606skip_protected_regs:
10607 sregs->cr0 = kvm_read_cr0(vcpu);
10608 sregs->cr4 = kvm_read_cr4(vcpu);
10609 sregs->cr8 = kvm_get_cr8(vcpu);
10610 sregs->efer = vcpu->arch.efer;
10611 sregs->apic_base = kvm_get_apic_base(vcpu);
10612}
10613
10614static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10615{
10616 __get_sregs_common(vcpu, sregs);
10617
10618 if (vcpu->arch.guest_state_protected)
10619 return;
10620
10621 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
10622 set_bit(vcpu->arch.interrupt.nr,
10623 (unsigned long *)sregs->interrupt_bitmap);
10624}
10625
10626static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
10627{
10628 int i;
10629
10630 __get_sregs_common(vcpu, (struct kvm_sregs *)sregs2);
10631
10632 if (vcpu->arch.guest_state_protected)
10633 return;
10634
10635 if (is_pae_paging(vcpu)) {
10636 for (i = 0 ; i < 4 ; i++)
10637 sregs2->pdptrs[i] = kvm_pdptr_read(vcpu, i);
10638 sregs2->flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID;
10639 }
10640}
10641
10642int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
10643 struct kvm_sregs *sregs)
10644{
10645 vcpu_load(vcpu);
10646 __get_sregs(vcpu, sregs);
10647 vcpu_put(vcpu);
10648 return 0;
10649}
10650
10651int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
10652 struct kvm_mp_state *mp_state)
10653{
10654 int r;
10655
10656 vcpu_load(vcpu);
10657 if (kvm_mpx_supported())
10658 kvm_load_guest_fpu(vcpu);
10659
10660 r = kvm_apic_accept_events(vcpu);
10661 if (r < 0)
10662 goto out;
10663 r = 0;
10664
10665 if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
10666 vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
10667 vcpu->arch.pv.pv_unhalted)
10668 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
10669 else
10670 mp_state->mp_state = vcpu->arch.mp_state;
10671
10672out:
10673 if (kvm_mpx_supported())
10674 kvm_put_guest_fpu(vcpu);
10675 vcpu_put(vcpu);
10676 return r;
10677}
10678
10679int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
10680 struct kvm_mp_state *mp_state)
10681{
10682 int ret = -EINVAL;
10683
10684 vcpu_load(vcpu);
10685
10686 if (!lapic_in_kernel(vcpu) &&
10687 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
10688 goto out;
10689
10690
10691
10692
10693
10694
10695 if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
10696 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
10697 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
10698 goto out;
10699
10700 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
10701 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
10702 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
10703 } else
10704 vcpu->arch.mp_state = mp_state->mp_state;
10705 kvm_make_request(KVM_REQ_EVENT, vcpu);
10706
10707 ret = 0;
10708out:
10709 vcpu_put(vcpu);
10710 return ret;
10711}
10712
10713int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
10714 int reason, bool has_error_code, u32 error_code)
10715{
10716 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
10717 int ret;
10718
10719 init_emulate_ctxt(vcpu);
10720
10721 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
10722 has_error_code, error_code);
10723 if (ret) {
10724 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
10725 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
10726 vcpu->run->internal.ndata = 0;
10727 return 0;
10728 }
10729
10730 kvm_rip_write(vcpu, ctxt->eip);
10731 kvm_set_rflags(vcpu, ctxt->eflags);
10732 return 1;
10733}
10734EXPORT_SYMBOL_GPL(kvm_task_switch);
10735
10736static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10737{
10738 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
10739
10740
10741
10742
10743
10744 if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
10745 return false;
10746 if (kvm_vcpu_is_illegal_gpa(vcpu, sregs->cr3))
10747 return false;
10748 } else {
10749
10750
10751
10752
10753 if (sregs->efer & EFER_LMA || sregs->cs.l)
10754 return false;
10755 }
10756
10757 return kvm_is_valid_cr4(vcpu, sregs->cr4);
10758}
10759
10760static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
10761 int *mmu_reset_needed, bool update_pdptrs)
10762{
10763 struct msr_data apic_base_msr;
10764 int idx;
10765 struct desc_ptr dt;
10766
10767 if (!kvm_is_valid_sregs(vcpu, sregs))
10768 return -EINVAL;
10769
10770 apic_base_msr.data = sregs->apic_base;
10771 apic_base_msr.host_initiated = true;
10772 if (kvm_set_apic_base(vcpu, &apic_base_msr))
10773 return -EINVAL;
10774
10775 if (vcpu->arch.guest_state_protected)
10776 return 0;
10777
10778 dt.size = sregs->idt.limit;
10779 dt.address = sregs->idt.base;
10780 static_call(kvm_x86_set_idt)(vcpu, &dt);
10781 dt.size = sregs->gdt.limit;
10782 dt.address = sregs->gdt.base;
10783 static_call(kvm_x86_set_gdt)(vcpu, &dt);
10784
10785 vcpu->arch.cr2 = sregs->cr2;
10786 *mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
10787 vcpu->arch.cr3 = sregs->cr3;
10788 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
10789 static_call_cond(kvm_x86_post_set_cr3)(vcpu, sregs->cr3);
10790
10791 kvm_set_cr8(vcpu, sregs->cr8);
10792
10793 *mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
10794 static_call(kvm_x86_set_efer)(vcpu, sregs->efer);
10795
10796 *mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
10797 static_call(kvm_x86_set_cr0)(vcpu, sregs->cr0);
10798 vcpu->arch.cr0 = sregs->cr0;
10799
10800 *mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
10801 static_call(kvm_x86_set_cr4)(vcpu, sregs->cr4);
10802
10803 if (update_pdptrs) {
10804 idx = srcu_read_lock(&vcpu->kvm->srcu);
10805 if (is_pae_paging(vcpu)) {
10806 load_pdptrs(vcpu, kvm_read_cr3(vcpu));
10807 *mmu_reset_needed = 1;
10808 }
10809 srcu_read_unlock(&vcpu->kvm->srcu, idx);
10810 }
10811
10812 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
10813 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
10814 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
10815 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
10816 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
10817 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
10818
10819 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
10820 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
10821
10822 update_cr8_intercept(vcpu);
10823
10824
10825 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
10826 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
10827 !is_protmode(vcpu))
10828 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10829
10830 return 0;
10831}
10832
10833static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
10834{
10835 int pending_vec, max_bits;
10836 int mmu_reset_needed = 0;
10837 int ret = __set_sregs_common(vcpu, sregs, &mmu_reset_needed, true);
10838
10839 if (ret)
10840 return ret;
10841
10842 if (mmu_reset_needed)
10843 kvm_mmu_reset_context(vcpu);
10844
10845 max_bits = KVM_NR_INTERRUPTS;
10846 pending_vec = find_first_bit(
10847 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
10848
10849 if (pending_vec < max_bits) {
10850 kvm_queue_interrupt(vcpu, pending_vec, false);
10851 pr_debug("Set back pending irq %d\n", pending_vec);
10852 kvm_make_request(KVM_REQ_EVENT, vcpu);
10853 }
10854 return 0;
10855}
10856
10857static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
10858{
10859 int mmu_reset_needed = 0;
10860 bool valid_pdptrs = sregs2->flags & KVM_SREGS2_FLAGS_PDPTRS_VALID;
10861 bool pae = (sregs2->cr0 & X86_CR0_PG) && (sregs2->cr4 & X86_CR4_PAE) &&
10862 !(sregs2->efer & EFER_LMA);
10863 int i, ret;
10864
10865 if (sregs2->flags & ~KVM_SREGS2_FLAGS_PDPTRS_VALID)
10866 return -EINVAL;
10867
10868 if (valid_pdptrs && (!pae || vcpu->arch.guest_state_protected))
10869 return -EINVAL;
10870
10871 ret = __set_sregs_common(vcpu, (struct kvm_sregs *)sregs2,
10872 &mmu_reset_needed, !valid_pdptrs);
10873 if (ret)
10874 return ret;
10875
10876 if (valid_pdptrs) {
10877 for (i = 0; i < 4 ; i++)
10878 kvm_pdptr_write(vcpu, i, sregs2->pdptrs[i]);
10879
10880 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
10881 mmu_reset_needed = 1;
10882 vcpu->arch.pdptrs_from_userspace = true;
10883 }
10884 if (mmu_reset_needed)
10885 kvm_mmu_reset_context(vcpu);
10886 return 0;
10887}
10888
10889int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
10890 struct kvm_sregs *sregs)
10891{
10892 int ret;
10893
10894 vcpu_load(vcpu);
10895 ret = __set_sregs(vcpu, sregs);
10896 vcpu_put(vcpu);
10897 return ret;
10898}
10899
10900static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
10901{
10902 bool inhibit = false;
10903 struct kvm_vcpu *vcpu;
10904 unsigned long i;
10905
10906 down_write(&kvm->arch.apicv_update_lock);
10907
10908 kvm_for_each_vcpu(i, vcpu, kvm) {
10909 if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
10910 inhibit = true;
10911 break;
10912 }
10913 }
10914 __kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ);
10915 up_write(&kvm->arch.apicv_update_lock);
10916}
10917
10918int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
10919 struct kvm_guest_debug *dbg)
10920{
10921 unsigned long rflags;
10922 int i, r;
10923
10924 if (vcpu->arch.guest_state_protected)
10925 return -EINVAL;
10926
10927 vcpu_load(vcpu);
10928
10929 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
10930 r = -EBUSY;
10931 if (vcpu->arch.exception.pending)
10932 goto out;
10933 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
10934 kvm_queue_exception(vcpu, DB_VECTOR);
10935 else
10936 kvm_queue_exception(vcpu, BP_VECTOR);
10937 }
10938
10939
10940
10941
10942
10943 rflags = kvm_get_rflags(vcpu);
10944
10945 vcpu->guest_debug = dbg->control;
10946 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
10947 vcpu->guest_debug = 0;
10948
10949 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
10950 for (i = 0; i < KVM_NR_DB_REGS; ++i)
10951 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
10952 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
10953 } else {
10954 for (i = 0; i < KVM_NR_DB_REGS; i++)
10955 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
10956 }
10957 kvm_update_dr7(vcpu);
10958
10959 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
10960 vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu);
10961
10962
10963
10964
10965
10966 kvm_set_rflags(vcpu, rflags);
10967
10968 static_call(kvm_x86_update_exception_bitmap)(vcpu);
10969
10970 kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
10971
10972 r = 0;
10973
10974out:
10975 vcpu_put(vcpu);
10976 return r;
10977}
10978
10979
10980
10981
10982int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
10983 struct kvm_translation *tr)
10984{
10985 unsigned long vaddr = tr->linear_address;
10986 gpa_t gpa;
10987 int idx;
10988
10989 vcpu_load(vcpu);
10990
10991 idx = srcu_read_lock(&vcpu->kvm->srcu);
10992 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
10993 srcu_read_unlock(&vcpu->kvm->srcu, idx);
10994 tr->physical_address = gpa;
10995 tr->valid = gpa != UNMAPPED_GVA;
10996 tr->writeable = 1;
10997 tr->usermode = 0;
10998
10999 vcpu_put(vcpu);
11000 return 0;
11001}
11002
11003int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
11004{
11005 struct fxregs_state *fxsave;
11006
11007 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
11008 return 0;
11009
11010 vcpu_load(vcpu);
11011
11012 fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
11013 memcpy(fpu->fpr, fxsave->st_space, 128);
11014 fpu->fcw = fxsave->cwd;
11015 fpu->fsw = fxsave->swd;
11016 fpu->ftwx = fxsave->twd;
11017 fpu->last_opcode = fxsave->fop;
11018 fpu->last_ip = fxsave->rip;
11019 fpu->last_dp = fxsave->rdp;
11020 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
11021
11022 vcpu_put(vcpu);
11023 return 0;
11024}
11025
11026int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
11027{
11028 struct fxregs_state *fxsave;
11029
11030 if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
11031 return 0;
11032
11033 vcpu_load(vcpu);
11034
11035 fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
11036
11037 memcpy(fxsave->st_space, fpu->fpr, 128);
11038 fxsave->cwd = fpu->fcw;
11039 fxsave->swd = fpu->fsw;
11040 fxsave->twd = fpu->ftwx;
11041 fxsave->fop = fpu->last_opcode;
11042 fxsave->rip = fpu->last_ip;
11043 fxsave->rdp = fpu->last_dp;
11044 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
11045
11046 vcpu_put(vcpu);
11047 return 0;
11048}
11049
11050static void store_regs(struct kvm_vcpu *vcpu)
11051{
11052 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
11053
11054 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
11055 __get_regs(vcpu, &vcpu->run->s.regs.regs);
11056
11057 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
11058 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
11059
11060 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
11061 kvm_vcpu_ioctl_x86_get_vcpu_events(
11062 vcpu, &vcpu->run->s.regs.events);
11063}
11064
11065static int sync_regs(struct kvm_vcpu *vcpu)
11066{
11067 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
11068 __set_regs(vcpu, &vcpu->run->s.regs.regs);
11069 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
11070 }
11071 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
11072 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
11073 return -EINVAL;
11074 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
11075 }
11076 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
11077 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
11078 vcpu, &vcpu->run->s.regs.events))
11079 return -EINVAL;
11080 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
11081 }
11082
11083 return 0;
11084}
11085
11086int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
11087{
11088 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
11089 pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
11090 "guest TSC will not be reliable\n");
11091
11092 return 0;
11093}
11094
11095int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
11096{
11097 struct page *page;
11098 int r;
11099
11100 vcpu->arch.last_vmentry_cpu = -1;
11101 vcpu->arch.regs_avail = ~0;
11102 vcpu->arch.regs_dirty = ~0;
11103
11104 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
11105 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
11106 else
11107 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
11108
11109 r = kvm_mmu_create(vcpu);
11110 if (r < 0)
11111 return r;
11112
11113 if (irqchip_in_kernel(vcpu->kvm)) {
11114 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
11115 if (r < 0)
11116 goto fail_mmu_destroy;
11117 if (kvm_apicv_activated(vcpu->kvm))
11118 vcpu->arch.apicv_active = true;
11119 } else
11120 static_branch_inc(&kvm_has_noapic_vcpu);
11121
11122 r = -ENOMEM;
11123
11124 page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
11125 if (!page)
11126 goto fail_free_lapic;
11127 vcpu->arch.pio_data = page_address(page);
11128
11129 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
11130 GFP_KERNEL_ACCOUNT);
11131 if (!vcpu->arch.mce_banks)
11132 goto fail_free_pio_data;
11133 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
11134
11135 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
11136 GFP_KERNEL_ACCOUNT))
11137 goto fail_free_mce_banks;
11138
11139 if (!alloc_emulate_ctxt(vcpu))
11140 goto free_wbinvd_dirty_mask;
11141
11142 if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) {
11143 pr_err("kvm: failed to allocate vcpu's fpu\n");
11144 goto free_emulate_ctxt;
11145 }
11146
11147 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
11148 vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
11149
11150 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
11151
11152 kvm_async_pf_hash_reset(vcpu);
11153 kvm_pmu_init(vcpu);
11154
11155 vcpu->arch.pending_external_vector = -1;
11156 vcpu->arch.preempted_in_kernel = false;
11157
11158#if IS_ENABLED(CONFIG_HYPERV)
11159 vcpu->arch.hv_root_tdp = INVALID_PAGE;
11160#endif
11161
11162 r = static_call(kvm_x86_vcpu_create)(vcpu);
11163 if (r)
11164 goto free_guest_fpu;
11165
11166 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
11167 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
11168 kvm_vcpu_mtrr_init(vcpu);
11169 vcpu_load(vcpu);
11170 kvm_set_tsc_khz(vcpu, max_tsc_khz);
11171 kvm_vcpu_reset(vcpu, false);
11172 kvm_init_mmu(vcpu);
11173 vcpu_put(vcpu);
11174 return 0;
11175
11176free_guest_fpu:
11177 fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
11178free_emulate_ctxt:
11179 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
11180free_wbinvd_dirty_mask:
11181 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
11182fail_free_mce_banks:
11183 kfree(vcpu->arch.mce_banks);
11184fail_free_pio_data:
11185 free_page((unsigned long)vcpu->arch.pio_data);
11186fail_free_lapic:
11187 kvm_free_lapic(vcpu);
11188fail_mmu_destroy:
11189 kvm_mmu_destroy(vcpu);
11190 return r;
11191}
11192
11193void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
11194{
11195 struct kvm *kvm = vcpu->kvm;
11196
11197 if (mutex_lock_killable(&vcpu->mutex))
11198 return;
11199 vcpu_load(vcpu);
11200 kvm_synchronize_tsc(vcpu, 0);
11201 vcpu_put(vcpu);
11202
11203
11204 vcpu->arch.msr_kvm_poll_control = 1;
11205
11206 mutex_unlock(&vcpu->mutex);
11207
11208 if (kvmclock_periodic_sync && vcpu->vcpu_idx == 0)
11209 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
11210 KVMCLOCK_SYNC_PERIOD);
11211}
11212
11213void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
11214{
11215 int idx;
11216
11217 kvmclock_reset(vcpu);
11218
11219 static_call(kvm_x86_vcpu_free)(vcpu);
11220
11221 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
11222 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
11223 fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
11224
11225 kvm_hv_vcpu_uninit(vcpu);
11226 kvm_pmu_destroy(vcpu);
11227 kfree(vcpu->arch.mce_banks);
11228 kvm_free_lapic(vcpu);
11229 idx = srcu_read_lock(&vcpu->kvm->srcu);
11230 kvm_mmu_destroy(vcpu);
11231 srcu_read_unlock(&vcpu->kvm->srcu, idx);
11232 free_page((unsigned long)vcpu->arch.pio_data);
11233 kvfree(vcpu->arch.cpuid_entries);
11234 if (!lapic_in_kernel(vcpu))
11235 static_branch_dec(&kvm_has_noapic_vcpu);
11236}
11237
11238void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
11239{
11240 struct kvm_cpuid_entry2 *cpuid_0x1;
11241 unsigned long old_cr0 = kvm_read_cr0(vcpu);
11242 unsigned long new_cr0;
11243
11244
11245
11246
11247
11248
11249
11250
11251 WARN_ON_ONCE(!init_event &&
11252 (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu)));
11253
11254 kvm_lapic_reset(vcpu, init_event);
11255
11256 vcpu->arch.hflags = 0;
11257
11258 vcpu->arch.smi_pending = 0;
11259 vcpu->arch.smi_count = 0;
11260 atomic_set(&vcpu->arch.nmi_queued, 0);
11261 vcpu->arch.nmi_pending = 0;
11262 vcpu->arch.nmi_injected = false;
11263 kvm_clear_interrupt_queue(vcpu);
11264 kvm_clear_exception_queue(vcpu);
11265
11266 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
11267 kvm_update_dr0123(vcpu);
11268 vcpu->arch.dr6 = DR6_ACTIVE_LOW;
11269 vcpu->arch.dr7 = DR7_FIXED_1;
11270 kvm_update_dr7(vcpu);
11271
11272 vcpu->arch.cr2 = 0;
11273
11274 kvm_make_request(KVM_REQ_EVENT, vcpu);
11275 vcpu->arch.apf.msr_en_val = 0;
11276 vcpu->arch.apf.msr_int_val = 0;
11277 vcpu->arch.st.msr_val = 0;
11278
11279 kvmclock_reset(vcpu);
11280
11281 kvm_clear_async_pf_completion_queue(vcpu);
11282 kvm_async_pf_hash_reset(vcpu);
11283 vcpu->arch.apf.halted = false;
11284
11285 if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
11286 struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
11287
11288
11289
11290
11291
11292 if (init_event)
11293 kvm_put_guest_fpu(vcpu);
11294
11295 fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
11296 fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
11297
11298 if (init_event)
11299 kvm_load_guest_fpu(vcpu);
11300 }
11301
11302 if (!init_event) {
11303 kvm_pmu_reset(vcpu);
11304 vcpu->arch.smbase = 0x30000;
11305
11306 vcpu->arch.msr_misc_features_enables = 0;
11307
11308 __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
11309 __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
11310 }
11311
11312
11313 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
11314 kvm_register_mark_dirty(vcpu, VCPU_REGS_RSP);
11315
11316
11317
11318
11319
11320
11321
11322
11323 cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
11324 kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
11325
11326 static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
11327
11328 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
11329 kvm_rip_write(vcpu, 0xfff0);
11330
11331 vcpu->arch.cr3 = 0;
11332 kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
11333
11334
11335
11336
11337
11338
11339 new_cr0 = X86_CR0_ET;
11340 if (init_event)
11341 new_cr0 |= (old_cr0 & (X86_CR0_NW | X86_CR0_CD));
11342 else
11343 new_cr0 |= X86_CR0_NW | X86_CR0_CD;
11344
11345 static_call(kvm_x86_set_cr0)(vcpu, new_cr0);
11346 static_call(kvm_x86_set_cr4)(vcpu, 0);
11347 static_call(kvm_x86_set_efer)(vcpu, 0);
11348 static_call(kvm_x86_update_exception_bitmap)(vcpu);
11349
11350
11351
11352
11353
11354
11355
11356
11357
11358 if (old_cr0 & X86_CR0_PG)
11359 kvm_mmu_reset_context(vcpu);
11360
11361
11362
11363
11364
11365
11366
11367
11368
11369
11370 if (init_event)
11371 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
11372}
11373EXPORT_SYMBOL_GPL(kvm_vcpu_reset);
11374
11375void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
11376{
11377 struct kvm_segment cs;
11378
11379 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
11380 cs.selector = vector << 8;
11381 cs.base = vector << 12;
11382 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
11383 kvm_rip_write(vcpu, 0);
11384}
11385EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
11386
11387int kvm_arch_hardware_enable(void)
11388{
11389 struct kvm *kvm;
11390 struct kvm_vcpu *vcpu;
11391 unsigned long i;
11392 int ret;
11393 u64 local_tsc;
11394 u64 max_tsc = 0;
11395 bool stable, backwards_tsc = false;
11396
11397 kvm_user_return_msr_cpu_online();
11398 ret = static_call(kvm_x86_hardware_enable)();
11399 if (ret != 0)
11400 return ret;
11401
11402 local_tsc = rdtsc();
11403 stable = !kvm_check_tsc_unstable();
11404 list_for_each_entry(kvm, &vm_list, vm_list) {
11405 kvm_for_each_vcpu(i, vcpu, kvm) {
11406 if (!stable && vcpu->cpu == smp_processor_id())
11407 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
11408 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
11409 backwards_tsc = true;
11410 if (vcpu->arch.last_host_tsc > max_tsc)
11411 max_tsc = vcpu->arch.last_host_tsc;
11412 }
11413 }
11414 }
11415
11416
11417
11418
11419
11420
11421
11422
11423
11424
11425
11426
11427
11428
11429
11430
11431
11432
11433
11434
11435
11436
11437
11438
11439
11440
11441
11442
11443
11444
11445
11446
11447
11448
11449
11450
11451
11452
11453
11454 if (backwards_tsc) {
11455 u64 delta_cyc = max_tsc - local_tsc;
11456 list_for_each_entry(kvm, &vm_list, vm_list) {
11457 kvm->arch.backwards_tsc_observed = true;
11458 kvm_for_each_vcpu(i, vcpu, kvm) {
11459 vcpu->arch.tsc_offset_adjustment += delta_cyc;
11460 vcpu->arch.last_host_tsc = local_tsc;
11461 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
11462 }
11463
11464
11465
11466
11467
11468
11469
11470 kvm->arch.last_tsc_nsec = 0;
11471 kvm->arch.last_tsc_write = 0;
11472 }
11473
11474 }
11475 return 0;
11476}
11477
11478void kvm_arch_hardware_disable(void)
11479{
11480 static_call(kvm_x86_hardware_disable)();
11481 drop_user_return_notifiers();
11482}
11483
11484int kvm_arch_hardware_setup(void *opaque)
11485{
11486 struct kvm_x86_init_ops *ops = opaque;
11487 int r;
11488
11489 rdmsrl_safe(MSR_EFER, &host_efer);
11490
11491 if (boot_cpu_has(X86_FEATURE_XSAVES))
11492 rdmsrl(MSR_IA32_XSS, host_xss);
11493
11494 r = ops->hardware_setup();
11495 if (r != 0)
11496 return r;
11497
11498 memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
11499 kvm_ops_static_call_update();
11500
11501 kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
11502
11503 if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
11504 supported_xss = 0;
11505
11506#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
11507 cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
11508#undef __kvm_cpu_cap_has
11509
11510 if (kvm_has_tsc_control) {
11511
11512
11513
11514
11515
11516
11517 u64 max = min(0x7fffffffULL,
11518 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
11519 kvm_max_guest_tsc_khz = max;
11520
11521 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
11522 }
11523
11524 kvm_init_msr_list();
11525 return 0;
11526}
11527
11528void kvm_arch_hardware_unsetup(void)
11529{
11530 kvm_unregister_perf_callbacks();
11531
11532 static_call(kvm_x86_hardware_unsetup)();
11533}
11534
11535int kvm_arch_check_processor_compat(void *opaque)
11536{
11537 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
11538 struct kvm_x86_init_ops *ops = opaque;
11539
11540 WARN_ON(!irqs_disabled());
11541
11542 if (__cr4_reserved_bits(cpu_has, c) !=
11543 __cr4_reserved_bits(cpu_has, &boot_cpu_data))
11544 return -EIO;
11545
11546 return ops->check_processor_compatibility();
11547}
11548
11549bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
11550{
11551 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
11552}
11553EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
11554
11555bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
11556{
11557 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
11558}
11559
11560__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
11561EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
11562
11563void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
11564{
11565 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
11566
11567 vcpu->arch.l1tf_flush_l1d = true;
11568 if (pmu->version && unlikely(pmu->event_count)) {
11569 pmu->need_cleanup = true;
11570 kvm_make_request(KVM_REQ_PMU, vcpu);
11571 }
11572 static_call(kvm_x86_sched_in)(vcpu, cpu);
11573}
11574
11575void kvm_arch_free_vm(struct kvm *kvm)
11576{
11577 kfree(to_kvm_hv(kvm)->hv_pa_pg);
11578 __kvm_arch_free_vm(kvm);
11579}
11580
11581
11582int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
11583{
11584 int ret;
11585 unsigned long flags;
11586
11587 if (type)
11588 return -EINVAL;
11589
11590 ret = kvm_page_track_init(kvm);
11591 if (ret)
11592 return ret;
11593
11594 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
11595 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
11596 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
11597 INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
11598 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
11599 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
11600
11601
11602 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
11603
11604 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
11605 &kvm->arch.irq_sources_bitmap);
11606
11607 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
11608 mutex_init(&kvm->arch.apic_map_lock);
11609 seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock);
11610 kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
11611
11612 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
11613 pvclock_update_vm_gtod_copy(kvm);
11614 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
11615
11616 kvm->arch.guest_can_read_msr_platform_info = true;
11617
11618#if IS_ENABLED(CONFIG_HYPERV)
11619 spin_lock_init(&kvm->arch.hv_root_tdp_lock);
11620 kvm->arch.hv_root_tdp = INVALID_PAGE;
11621#endif
11622
11623 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
11624 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
11625
11626 kvm_apicv_init(kvm);
11627 kvm_hv_init_vm(kvm);
11628 kvm_mmu_init_vm(kvm);
11629 kvm_xen_init_vm(kvm);
11630
11631 return static_call(kvm_x86_vm_init)(kvm);
11632}
11633
11634int kvm_arch_post_init_vm(struct kvm *kvm)
11635{
11636 return kvm_mmu_post_init_vm(kvm);
11637}
11638
11639static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
11640{
11641 vcpu_load(vcpu);
11642 kvm_mmu_unload(vcpu);
11643 vcpu_put(vcpu);
11644}
11645
11646static void kvm_free_vcpus(struct kvm *kvm)
11647{
11648 unsigned long i;
11649 struct kvm_vcpu *vcpu;
11650
11651
11652
11653
11654 kvm_for_each_vcpu(i, vcpu, kvm) {
11655 kvm_clear_async_pf_completion_queue(vcpu);
11656 kvm_unload_vcpu_mmu(vcpu);
11657 }
11658
11659 kvm_destroy_vcpus(kvm);
11660}
11661
11662void kvm_arch_sync_events(struct kvm *kvm)
11663{
11664 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
11665 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
11666 kvm_free_pit(kvm);
11667}
11668
11669
11670
11671
11672
11673
11674
11675
11676
11677
11678
11679
11680
11681
11682
11683
11684
11685
11686
11687
11688
11689
11690
11691void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
11692 u32 size)
11693{
11694 int i, r;
11695 unsigned long hva, old_npages;
11696 struct kvm_memslots *slots = kvm_memslots(kvm);
11697 struct kvm_memory_slot *slot;
11698
11699
11700 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
11701 return ERR_PTR_USR(-EINVAL);
11702
11703 slot = id_to_memslot(slots, id);
11704 if (size) {
11705 if (slot && slot->npages)
11706 return ERR_PTR_USR(-EEXIST);
11707
11708
11709
11710
11711
11712 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
11713 MAP_SHARED | MAP_ANONYMOUS, 0);
11714 if (IS_ERR((void *)hva))
11715 return (void __user *)hva;
11716 } else {
11717 if (!slot || !slot->npages)
11718 return NULL;
11719
11720 old_npages = slot->npages;
11721 hva = slot->userspace_addr;
11722 }
11723
11724 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
11725 struct kvm_userspace_memory_region m;
11726
11727 m.slot = id | (i << 16);
11728 m.flags = 0;
11729 m.guest_phys_addr = gpa;
11730 m.userspace_addr = hva;
11731 m.memory_size = size;
11732 r = __kvm_set_memory_region(kvm, &m);
11733 if (r < 0)
11734 return ERR_PTR_USR(r);
11735 }
11736
11737 if (!size)
11738 vm_munmap(hva, old_npages * PAGE_SIZE);
11739
11740 return (void __user *)hva;
11741}
11742EXPORT_SYMBOL_GPL(__x86_set_memory_region);
11743
11744void kvm_arch_pre_destroy_vm(struct kvm *kvm)
11745{
11746 kvm_mmu_pre_destroy_vm(kvm);
11747}
11748
11749void kvm_arch_destroy_vm(struct kvm *kvm)
11750{
11751 if (current->mm == kvm->mm) {
11752
11753
11754
11755
11756
11757 mutex_lock(&kvm->slots_lock);
11758 __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
11759 0, 0);
11760 __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
11761 0, 0);
11762 __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
11763 mutex_unlock(&kvm->slots_lock);
11764 }
11765 static_call_cond(kvm_x86_vm_destroy)(kvm);
11766 kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
11767 kvm_pic_destroy(kvm);
11768 kvm_ioapic_destroy(kvm);
11769 kvm_free_vcpus(kvm);
11770 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
11771 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
11772 kvm_mmu_uninit_vm(kvm);
11773 kvm_page_track_cleanup(kvm);
11774 kvm_xen_destroy_vm(kvm);
11775 kvm_hv_destroy_vm(kvm);
11776}
11777
11778static void memslot_rmap_free(struct kvm_memory_slot *slot)
11779{
11780 int i;
11781
11782 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
11783 kvfree(slot->arch.rmap[i]);
11784 slot->arch.rmap[i] = NULL;
11785 }
11786}
11787
11788void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
11789{
11790 int i;
11791
11792 memslot_rmap_free(slot);
11793
11794 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11795 kvfree(slot->arch.lpage_info[i - 1]);
11796 slot->arch.lpage_info[i - 1] = NULL;
11797 }
11798
11799 kvm_page_track_free_memslot(slot);
11800}
11801
11802int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
11803{
11804 const int sz = sizeof(*slot->arch.rmap[0]);
11805 int i;
11806
11807 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
11808 int level = i + 1;
11809 int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
11810
11811 if (slot->arch.rmap[i])
11812 continue;
11813
11814 slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
11815 if (!slot->arch.rmap[i]) {
11816 memslot_rmap_free(slot);
11817 return -ENOMEM;
11818 }
11819 }
11820
11821 return 0;
11822}
11823
11824static int kvm_alloc_memslot_metadata(struct kvm *kvm,
11825 struct kvm_memory_slot *slot)
11826{
11827 unsigned long npages = slot->npages;
11828 int i, r;
11829
11830
11831
11832
11833
11834
11835 memset(&slot->arch, 0, sizeof(slot->arch));
11836
11837 if (kvm_memslots_have_rmaps(kvm)) {
11838 r = memslot_rmap_alloc(slot, npages);
11839 if (r)
11840 return r;
11841 }
11842
11843 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11844 struct kvm_lpage_info *linfo;
11845 unsigned long ugfn;
11846 int lpages;
11847 int level = i + 1;
11848
11849 lpages = __kvm_mmu_slot_lpages(slot, npages, level);
11850
11851 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
11852 if (!linfo)
11853 goto out_free;
11854
11855 slot->arch.lpage_info[i - 1] = linfo;
11856
11857 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
11858 linfo[0].disallow_lpage = 1;
11859 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
11860 linfo[lpages - 1].disallow_lpage = 1;
11861 ugfn = slot->userspace_addr >> PAGE_SHIFT;
11862
11863
11864
11865
11866 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1)) {
11867 unsigned long j;
11868
11869 for (j = 0; j < lpages; ++j)
11870 linfo[j].disallow_lpage = 1;
11871 }
11872 }
11873
11874 if (kvm_page_track_create_memslot(kvm, slot, npages))
11875 goto out_free;
11876
11877 return 0;
11878
11879out_free:
11880 memslot_rmap_free(slot);
11881
11882 for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
11883 kvfree(slot->arch.lpage_info[i - 1]);
11884 slot->arch.lpage_info[i - 1] = NULL;
11885 }
11886 return -ENOMEM;
11887}
11888
11889void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
11890{
11891 struct kvm_vcpu *vcpu;
11892 unsigned long i;
11893
11894
11895
11896
11897
11898 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
11899
11900
11901 kvm_for_each_vcpu(i, vcpu, kvm)
11902 kvm_vcpu_kick(vcpu);
11903}
11904
11905int kvm_arch_prepare_memory_region(struct kvm *kvm,
11906 const struct kvm_memory_slot *old,
11907 struct kvm_memory_slot *new,
11908 enum kvm_mr_change change)
11909{
11910 if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
11911 return kvm_alloc_memslot_metadata(kvm, new);
11912
11913 if (change == KVM_MR_FLAGS_ONLY)
11914 memcpy(&new->arch, &old->arch, sizeof(old->arch));
11915 else if (WARN_ON_ONCE(change != KVM_MR_DELETE))
11916 return -EIO;
11917
11918 return 0;
11919}
11920
11921
11922static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
11923{
11924 struct kvm_arch *ka = &kvm->arch;
11925
11926 if (!kvm_x86_ops.cpu_dirty_log_size)
11927 return;
11928
11929 if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
11930 (!enable && --ka->cpu_dirty_logging_count == 0))
11931 kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
11932
11933 WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
11934}
11935
11936static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
11937 struct kvm_memory_slot *old,
11938 const struct kvm_memory_slot *new,
11939 enum kvm_mr_change change)
11940{
11941 u32 old_flags = old ? old->flags : 0;
11942 u32 new_flags = new ? new->flags : 0;
11943 bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES;
11944
11945
11946
11947
11948
11949 if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)
11950 kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
11951
11952
11953
11954
11955
11956
11957
11958
11959
11960
11961
11962
11963
11964
11965
11966
11967 if ((change != KVM_MR_FLAGS_ONLY) || (new_flags & KVM_MEM_READONLY))
11968 return;
11969
11970
11971
11972
11973
11974
11975 if (WARN_ON_ONCE(!((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)))
11976 return;
11977
11978 if (!log_dirty_pages) {
11979
11980
11981
11982
11983
11984
11985
11986
11987
11988
11989
11990
11991
11992 kvm_mmu_zap_collapsible_sptes(kvm, new);
11993 } else {
11994
11995
11996
11997
11998 if (kvm_dirty_log_manual_protect_and_init_set(kvm))
11999 return;
12000
12001 if (kvm_x86_ops.cpu_dirty_log_size) {
12002 kvm_mmu_slot_leaf_clear_dirty(kvm, new);
12003 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
12004 } else {
12005 kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
12006 }
12007 }
12008}
12009
12010void kvm_arch_commit_memory_region(struct kvm *kvm,
12011 struct kvm_memory_slot *old,
12012 const struct kvm_memory_slot *new,
12013 enum kvm_mr_change change)
12014{
12015 if (!kvm->arch.n_requested_mmu_pages &&
12016 (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
12017 unsigned long nr_mmu_pages;
12018
12019 nr_mmu_pages = kvm->nr_memslot_pages / KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO;
12020 nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
12021 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
12022 }
12023
12024 kvm_mmu_slot_apply_flags(kvm, old, new, change);
12025
12026
12027 if (change == KVM_MR_MOVE)
12028 kvm_arch_free_memslot(kvm, old);
12029}
12030
12031void kvm_arch_flush_shadow_all(struct kvm *kvm)
12032{
12033 kvm_mmu_zap_all(kvm);
12034}
12035
12036void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
12037 struct kvm_memory_slot *slot)
12038{
12039 kvm_page_track_flush_slot(kvm, slot);
12040}
12041
12042static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
12043{
12044 return (is_guest_mode(vcpu) &&
12045 kvm_x86_ops.guest_apic_has_interrupt &&
12046 static_call(kvm_x86_guest_apic_has_interrupt)(vcpu));
12047}
12048
12049static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
12050{
12051 if (!list_empty_careful(&vcpu->async_pf.done))
12052 return true;
12053
12054 if (kvm_apic_has_events(vcpu))
12055 return true;
12056
12057 if (vcpu->arch.pv.pv_unhalted)
12058 return true;
12059
12060 if (vcpu->arch.exception.pending)
12061 return true;
12062
12063 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
12064 (vcpu->arch.nmi_pending &&
12065 static_call(kvm_x86_nmi_allowed)(vcpu, false)))
12066 return true;
12067
12068 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
12069 (vcpu->arch.smi_pending &&
12070 static_call(kvm_x86_smi_allowed)(vcpu, false)))
12071 return true;
12072
12073 if (kvm_arch_interrupt_allowed(vcpu) &&
12074 (kvm_cpu_has_interrupt(vcpu) ||
12075 kvm_guest_apic_has_interrupt(vcpu)))
12076 return true;
12077
12078 if (kvm_hv_has_stimer_pending(vcpu))
12079 return true;
12080
12081 if (is_guest_mode(vcpu) &&
12082 kvm_x86_ops.nested_ops->hv_timer_pending &&
12083 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
12084 return true;
12085
12086 return false;
12087}
12088
12089int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
12090{
12091 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
12092}
12093
12094bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
12095{
12096 if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
12097 return true;
12098
12099 return false;
12100}
12101
12102bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
12103{
12104 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
12105 return true;
12106
12107 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
12108 kvm_test_request(KVM_REQ_SMI, vcpu) ||
12109 kvm_test_request(KVM_REQ_EVENT, vcpu))
12110 return true;
12111
12112 return kvm_arch_dy_has_pending_interrupt(vcpu);
12113}
12114
12115bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
12116{
12117 if (vcpu->arch.guest_state_protected)
12118 return true;
12119
12120 return vcpu->arch.preempted_in_kernel;
12121}
12122
12123unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
12124{
12125 return kvm_rip_read(vcpu);
12126}
12127
12128int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
12129{
12130 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
12131}
12132
12133int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
12134{
12135 return static_call(kvm_x86_interrupt_allowed)(vcpu, false);
12136}
12137
12138unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
12139{
12140
12141 if (vcpu->arch.guest_state_protected)
12142 return 0;
12143
12144 if (is_64_bit_mode(vcpu))
12145 return kvm_rip_read(vcpu);
12146 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
12147 kvm_rip_read(vcpu));
12148}
12149EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
12150
12151bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
12152{
12153 return kvm_get_linear_rip(vcpu) == linear_rip;
12154}
12155EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
12156
12157unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
12158{
12159 unsigned long rflags;
12160
12161 rflags = static_call(kvm_x86_get_rflags)(vcpu);
12162 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
12163 rflags &= ~X86_EFLAGS_TF;
12164 return rflags;
12165}
12166EXPORT_SYMBOL_GPL(kvm_get_rflags);
12167
12168static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
12169{
12170 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
12171 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
12172 rflags |= X86_EFLAGS_TF;
12173 static_call(kvm_x86_set_rflags)(vcpu, rflags);
12174}
12175
12176void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
12177{
12178 __kvm_set_rflags(vcpu, rflags);
12179 kvm_make_request(KVM_REQ_EVENT, vcpu);
12180}
12181EXPORT_SYMBOL_GPL(kvm_set_rflags);
12182
12183void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
12184{
12185 int r;
12186
12187 if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
12188 work->wakeup_all)
12189 return;
12190
12191 r = kvm_mmu_reload(vcpu);
12192 if (unlikely(r))
12193 return;
12194
12195 if (!vcpu->arch.mmu->direct_map &&
12196 work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
12197 return;
12198
12199 kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
12200}
12201
12202static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
12203{
12204 BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
12205
12206 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
12207}
12208
12209static inline u32 kvm_async_pf_next_probe(u32 key)
12210{
12211 return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
12212}
12213
12214static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
12215{
12216 u32 key = kvm_async_pf_hash_fn(gfn);
12217
12218 while (vcpu->arch.apf.gfns[key] != ~0)
12219 key = kvm_async_pf_next_probe(key);
12220
12221 vcpu->arch.apf.gfns[key] = gfn;
12222}
12223
12224static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
12225{
12226 int i;
12227 u32 key = kvm_async_pf_hash_fn(gfn);
12228
12229 for (i = 0; i < ASYNC_PF_PER_VCPU &&
12230 (vcpu->arch.apf.gfns[key] != gfn &&
12231 vcpu->arch.apf.gfns[key] != ~0); i++)
12232 key = kvm_async_pf_next_probe(key);
12233
12234 return key;
12235}
12236
12237bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
12238{
12239 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
12240}
12241
12242static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
12243{
12244 u32 i, j, k;
12245
12246 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
12247
12248 if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
12249 return;
12250
12251 while (true) {
12252 vcpu->arch.apf.gfns[i] = ~0;
12253 do {
12254 j = kvm_async_pf_next_probe(j);
12255 if (vcpu->arch.apf.gfns[j] == ~0)
12256 return;
12257 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
12258
12259
12260
12261
12262
12263 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
12264 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
12265 i = j;
12266 }
12267}
12268
12269static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
12270{
12271 u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT;
12272
12273 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason,
12274 sizeof(reason));
12275}
12276
12277static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
12278{
12279 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
12280
12281 return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
12282 &token, offset, sizeof(token));
12283}
12284
12285static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
12286{
12287 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
12288 u32 val;
12289
12290 if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
12291 &val, offset, sizeof(val)))
12292 return false;
12293
12294 return !val;
12295}
12296
12297static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
12298{
12299 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
12300 return false;
12301
12302 if (!kvm_pv_async_pf_enabled(vcpu) ||
12303 (vcpu->arch.apf.send_user_only && static_call(kvm_x86_get_cpl)(vcpu) == 0))
12304 return false;
12305
12306 return true;
12307}
12308
12309bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
12310{
12311 if (unlikely(!lapic_in_kernel(vcpu) ||
12312 kvm_event_needs_reinjection(vcpu) ||
12313 vcpu->arch.exception.pending))
12314 return false;
12315
12316 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
12317 return false;
12318
12319
12320
12321
12322
12323 return kvm_arch_interrupt_allowed(vcpu);
12324}
12325
12326bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
12327 struct kvm_async_pf *work)
12328{
12329 struct x86_exception fault;
12330
12331 trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
12332 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
12333
12334 if (kvm_can_deliver_async_pf(vcpu) &&
12335 !apf_put_user_notpresent(vcpu)) {
12336 fault.vector = PF_VECTOR;
12337 fault.error_code_valid = true;
12338 fault.error_code = 0;
12339 fault.nested_page_fault = false;
12340 fault.address = work->arch.token;
12341 fault.async_page_fault = true;
12342 kvm_inject_page_fault(vcpu, &fault);
12343 return true;
12344 } else {
12345
12346
12347
12348
12349
12350
12351
12352
12353 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
12354 return false;
12355 }
12356}
12357
12358void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
12359 struct kvm_async_pf *work)
12360{
12361 struct kvm_lapic_irq irq = {
12362 .delivery_mode = APIC_DM_FIXED,
12363 .vector = vcpu->arch.apf.vec
12364 };
12365
12366 if (work->wakeup_all)
12367 work->arch.token = ~0;
12368 else
12369 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
12370 trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
12371
12372 if ((work->wakeup_all || work->notpresent_injected) &&
12373 kvm_pv_async_pf_enabled(vcpu) &&
12374 !apf_put_user_ready(vcpu, work->arch.token)) {
12375 vcpu->arch.apf.pageready_pending = true;
12376 kvm_apic_set_irq(vcpu, &irq, NULL);
12377 }
12378
12379 vcpu->arch.apf.halted = false;
12380 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
12381}
12382
12383void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
12384{
12385 kvm_make_request(KVM_REQ_APF_READY, vcpu);
12386 if (!vcpu->arch.apf.pageready_pending)
12387 kvm_vcpu_kick(vcpu);
12388}
12389
12390bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
12391{
12392 if (!kvm_pv_async_pf_enabled(vcpu))
12393 return true;
12394 else
12395 return kvm_lapic_enabled(vcpu) && apf_pageready_slot_free(vcpu);
12396}
12397
12398void kvm_arch_start_assignment(struct kvm *kvm)
12399{
12400 if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
12401 static_call_cond(kvm_x86_start_assignment)(kvm);
12402}
12403EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
12404
12405void kvm_arch_end_assignment(struct kvm *kvm)
12406{
12407 atomic_dec(&kvm->arch.assigned_device_count);
12408}
12409EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
12410
12411bool kvm_arch_has_assigned_device(struct kvm *kvm)
12412{
12413 return atomic_read(&kvm->arch.assigned_device_count);
12414}
12415EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
12416
12417void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
12418{
12419 atomic_inc(&kvm->arch.noncoherent_dma_count);
12420}
12421EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
12422
12423void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
12424{
12425 atomic_dec(&kvm->arch.noncoherent_dma_count);
12426}
12427EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
12428
12429bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
12430{
12431 return atomic_read(&kvm->arch.noncoherent_dma_count);
12432}
12433EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
12434
12435bool kvm_arch_has_irq_bypass(void)
12436{
12437 return true;
12438}
12439
12440int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
12441 struct irq_bypass_producer *prod)
12442{
12443 struct kvm_kernel_irqfd *irqfd =
12444 container_of(cons, struct kvm_kernel_irqfd, consumer);
12445 int ret;
12446
12447 irqfd->producer = prod;
12448 kvm_arch_start_assignment(irqfd->kvm);
12449 ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm,
12450 prod->irq, irqfd->gsi, 1);
12451
12452 if (ret)
12453 kvm_arch_end_assignment(irqfd->kvm);
12454
12455 return ret;
12456}
12457
12458void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
12459 struct irq_bypass_producer *prod)
12460{
12461 int ret;
12462 struct kvm_kernel_irqfd *irqfd =
12463 container_of(cons, struct kvm_kernel_irqfd, consumer);
12464
12465 WARN_ON(irqfd->producer != prod);
12466 irqfd->producer = NULL;
12467
12468
12469
12470
12471
12472
12473
12474 ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0);
12475 if (ret)
12476 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
12477 " fails: %d\n", irqfd->consumer.token, ret);
12478
12479 kvm_arch_end_assignment(irqfd->kvm);
12480}
12481
12482int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
12483 uint32_t guest_irq, bool set)
12484{
12485 return static_call(kvm_x86_update_pi_irte)(kvm, host_irq, guest_irq, set);
12486}
12487
12488bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
12489 struct kvm_kernel_irq_routing_entry *new)
12490{
12491 if (new->type != KVM_IRQ_ROUTING_MSI)
12492 return true;
12493
12494 return !!memcmp(&old->msi, &new->msi, sizeof(new->msi));
12495}
12496
12497bool kvm_vector_hashing_enabled(void)
12498{
12499 return vector_hashing;
12500}
12501
12502bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
12503{
12504 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
12505}
12506EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
12507
12508
12509int kvm_spec_ctrl_test_value(u64 value)
12510{
12511
12512
12513
12514
12515
12516 u64 saved_value;
12517 unsigned long flags;
12518 int ret = 0;
12519
12520 local_irq_save(flags);
12521
12522 if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
12523 ret = 1;
12524 else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
12525 ret = 1;
12526 else
12527 wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
12528
12529 local_irq_restore(flags);
12530
12531 return ret;
12532}
12533EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
12534
12535void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
12536{
12537 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
12538 struct x86_exception fault;
12539 u32 access = error_code &
12540 (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
12541
12542 if (!(error_code & PFERR_PRESENT_MASK) ||
12543 mmu->gva_to_gpa(vcpu, mmu, gva, access, &fault) != UNMAPPED_GVA) {
12544
12545
12546
12547
12548
12549 fault.vector = PF_VECTOR;
12550 fault.error_code_valid = true;
12551 fault.error_code = error_code;
12552 fault.nested_page_fault = false;
12553 fault.address = gva;
12554 }
12555 vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
12556}
12557EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
12558
12559
12560
12561
12562
12563
12564int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
12565 struct x86_exception *e)
12566{
12567 if (r == X86EMUL_PROPAGATE_FAULT) {
12568 kvm_inject_emulated_page_fault(vcpu, e);
12569 return 1;
12570 }
12571
12572
12573
12574
12575
12576
12577
12578
12579 kvm_prepare_emulation_failure_exit(vcpu);
12580
12581 return 0;
12582}
12583EXPORT_SYMBOL_GPL(kvm_handle_memory_failure);
12584
12585int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
12586{
12587 bool pcid_enabled;
12588 struct x86_exception e;
12589 struct {
12590 u64 pcid;
12591 u64 gla;
12592 } operand;
12593 int r;
12594
12595 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
12596 if (r != X86EMUL_CONTINUE)
12597 return kvm_handle_memory_failure(vcpu, r, &e);
12598
12599 if (operand.pcid >> 12 != 0) {
12600 kvm_inject_gp(vcpu, 0);
12601 return 1;
12602 }
12603
12604 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
12605
12606 switch (type) {
12607 case INVPCID_TYPE_INDIV_ADDR:
12608 if ((!pcid_enabled && (operand.pcid != 0)) ||
12609 is_noncanonical_address(operand.gla, vcpu)) {
12610 kvm_inject_gp(vcpu, 0);
12611 return 1;
12612 }
12613 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
12614 return kvm_skip_emulated_instruction(vcpu);
12615
12616 case INVPCID_TYPE_SINGLE_CTXT:
12617 if (!pcid_enabled && (operand.pcid != 0)) {
12618 kvm_inject_gp(vcpu, 0);
12619 return 1;
12620 }
12621
12622 kvm_invalidate_pcid(vcpu, operand.pcid);
12623 return kvm_skip_emulated_instruction(vcpu);
12624
12625 case INVPCID_TYPE_ALL_NON_GLOBAL:
12626
12627
12628
12629
12630
12631
12632
12633 fallthrough;
12634 case INVPCID_TYPE_ALL_INCL_GLOBAL:
12635 kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
12636 return kvm_skip_emulated_instruction(vcpu);
12637
12638 default:
12639 kvm_inject_gp(vcpu, 0);
12640 return 1;
12641 }
12642}
12643EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
12644
12645static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
12646{
12647 struct kvm_run *run = vcpu->run;
12648 struct kvm_mmio_fragment *frag;
12649 unsigned int len;
12650
12651 BUG_ON(!vcpu->mmio_needed);
12652
12653
12654 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
12655 len = min(8u, frag->len);
12656 if (!vcpu->mmio_is_write)
12657 memcpy(frag->data, run->mmio.data, len);
12658
12659 if (frag->len <= 8) {
12660
12661 frag++;
12662 vcpu->mmio_cur_fragment++;
12663 } else {
12664
12665 frag->data += len;
12666 frag->gpa += len;
12667 frag->len -= len;
12668 }
12669
12670 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
12671 vcpu->mmio_needed = 0;
12672
12673
12674
12675 return 1;
12676 }
12677
12678
12679 run->mmio.phys_addr = frag->gpa;
12680 run->mmio.len = min(8u, frag->len);
12681 run->mmio.is_write = vcpu->mmio_is_write;
12682 if (run->mmio.is_write)
12683 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
12684 run->exit_reason = KVM_EXIT_MMIO;
12685
12686 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12687
12688 return 0;
12689}
12690
12691int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
12692 void *data)
12693{
12694 int handled;
12695 struct kvm_mmio_fragment *frag;
12696
12697 if (!data)
12698 return -EINVAL;
12699
12700 handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data);
12701 if (handled == bytes)
12702 return 1;
12703
12704 bytes -= handled;
12705 gpa += handled;
12706 data += handled;
12707
12708
12709 frag = vcpu->mmio_fragments;
12710 vcpu->mmio_nr_fragments = 1;
12711 frag->len = bytes;
12712 frag->gpa = gpa;
12713 frag->data = data;
12714
12715 vcpu->mmio_needed = 1;
12716 vcpu->mmio_cur_fragment = 0;
12717
12718 vcpu->run->mmio.phys_addr = gpa;
12719 vcpu->run->mmio.len = min(8u, frag->len);
12720 vcpu->run->mmio.is_write = 1;
12721 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
12722 vcpu->run->exit_reason = KVM_EXIT_MMIO;
12723
12724 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12725
12726 return 0;
12727}
12728EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);
12729
12730int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
12731 void *data)
12732{
12733 int handled;
12734 struct kvm_mmio_fragment *frag;
12735
12736 if (!data)
12737 return -EINVAL;
12738
12739 handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data);
12740 if (handled == bytes)
12741 return 1;
12742
12743 bytes -= handled;
12744 gpa += handled;
12745 data += handled;
12746
12747
12748 frag = vcpu->mmio_fragments;
12749 vcpu->mmio_nr_fragments = 1;
12750 frag->len = bytes;
12751 frag->gpa = gpa;
12752 frag->data = data;
12753
12754 vcpu->mmio_needed = 1;
12755 vcpu->mmio_cur_fragment = 0;
12756
12757 vcpu->run->mmio.phys_addr = gpa;
12758 vcpu->run->mmio.len = min(8u, frag->len);
12759 vcpu->run->mmio.is_write = 0;
12760 vcpu->run->exit_reason = KVM_EXIT_MMIO;
12761
12762 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;
12763
12764 return 0;
12765}
12766EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
12767
12768static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
12769 unsigned int port);
12770
12771static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
12772{
12773 int size = vcpu->arch.pio.size;
12774 int port = vcpu->arch.pio.port;
12775
12776 vcpu->arch.pio.count = 0;
12777 if (vcpu->arch.sev_pio_count)
12778 return kvm_sev_es_outs(vcpu, size, port);
12779 return 1;
12780}
12781
12782static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
12783 unsigned int port)
12784{
12785 for (;;) {
12786 unsigned int count =
12787 min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
12788 int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
12789
12790
12791 vcpu->arch.sev_pio_count -= count;
12792 vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
12793 if (!ret)
12794 break;
12795
12796
12797 if (!vcpu->arch.sev_pio_count)
12798 return 1;
12799 }
12800
12801 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
12802 return 0;
12803}
12804
12805static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
12806 unsigned int port);
12807
12808static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
12809{
12810 unsigned count = vcpu->arch.pio.count;
12811 complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
12812 vcpu->arch.sev_pio_count -= count;
12813 vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
12814}
12815
12816static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
12817{
12818 int size = vcpu->arch.pio.size;
12819 int port = vcpu->arch.pio.port;
12820
12821 advance_sev_es_emulated_ins(vcpu);
12822 if (vcpu->arch.sev_pio_count)
12823 return kvm_sev_es_ins(vcpu, size, port);
12824 return 1;
12825}
12826
12827static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
12828 unsigned int port)
12829{
12830 for (;;) {
12831 unsigned int count =
12832 min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
12833 if (!__emulator_pio_in(vcpu, size, port, count))
12834 break;
12835
12836
12837 advance_sev_es_emulated_ins(vcpu);
12838 if (!vcpu->arch.sev_pio_count)
12839 return 1;
12840 }
12841
12842 vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
12843 return 0;
12844}
12845
12846int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
12847 unsigned int port, void *data, unsigned int count,
12848 int in)
12849{
12850 vcpu->arch.sev_pio_data = data;
12851 vcpu->arch.sev_pio_count = count;
12852 return in ? kvm_sev_es_ins(vcpu, size, port)
12853 : kvm_sev_es_outs(vcpu, size, port);
12854}
12855EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
12856
12857EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
12858EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
12859EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
12860EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
12861EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
12862EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
12863EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
12864EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
12865EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
12866EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
12867EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
12868EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
12869EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
12870EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
12871EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
12872EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
12873EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
12874EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
12875EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
12876EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
12877EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
12878EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
12879EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
12880EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_accept_irq);
12881EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
12882EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
12883EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
12884EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
12885