1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kvm_host.h>
20#include "irq.h"
21#include "ioapic.h"
22#include "mmu.h"
23#include "i8254.h"
24#include "tss.h"
25#include "kvm_cache_regs.h"
26#include "kvm_emulate.h"
27#include "x86.h"
28#include "cpuid.h"
29#include "pmu.h"
30#include "hyperv.h"
31#include "lapic.h"
32
33#include <linux/clocksource.h>
34#include <linux/interrupt.h>
35#include <linux/kvm.h>
36#include <linux/fs.h>
37#include <linux/vmalloc.h>
38#include <linux/export.h>
39#include <linux/moduleparam.h>
40#include <linux/mman.h>
41#include <linux/highmem.h>
42#include <linux/iommu.h>
43#include <linux/intel-iommu.h>
44#include <linux/cpufreq.h>
45#include <linux/user-return-notifier.h>
46#include <linux/srcu.h>
47#include <linux/slab.h>
48#include <linux/perf_event.h>
49#include <linux/uaccess.h>
50#include <linux/hash.h>
51#include <linux/pci.h>
52#include <linux/timekeeper_internal.h>
53#include <linux/pvclock_gtod.h>
54#include <linux/kvm_irqfd.h>
55#include <linux/irqbypass.h>
56#include <linux/sched/stat.h>
57#include <linux/sched/isolation.h>
58#include <linux/mem_encrypt.h>
59#include <linux/entry-kvm.h>
60
61#include <trace/events/kvm.h>
62
63#include <asm/debugreg.h>
64#include <asm/msr.h>
65#include <asm/desc.h>
66#include <asm/mce.h>
67#include <linux/kernel_stat.h>
68#include <asm/fpu/internal.h>
69#include <asm/pvclock.h>
70#include <asm/div64.h>
71#include <asm/irq_remapping.h>
72#include <asm/mshyperv.h>
73#include <asm/hypervisor.h>
74#include <asm/tlbflush.h>
75#include <asm/intel_pt.h>
76#include <asm/emulate_prefix.h>
77#include <clocksource/hyperv_timer.h>
78
79#define CREATE_TRACE_POINTS
80#include "trace.h"
81
82#define MAX_IO_MSRS 256
83#define KVM_MAX_MCE_BANKS 32
84u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
85EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
86
87#define emul_to_vcpu(ctxt) \
88 ((struct kvm_vcpu *)(ctxt)->vcpu)
89
90
91
92
93
94#ifdef CONFIG_X86_64
95static
96u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
97#else
98static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
99#endif
100
101static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
102
103#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
104 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
105
106static void update_cr8_intercept(struct kvm_vcpu *vcpu);
107static void process_nmi(struct kvm_vcpu *vcpu);
108static void enter_smm(struct kvm_vcpu *vcpu);
109static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
110static void store_regs(struct kvm_vcpu *vcpu);
111static int sync_regs(struct kvm_vcpu *vcpu);
112
113struct kvm_x86_ops kvm_x86_ops __read_mostly;
114EXPORT_SYMBOL_GPL(kvm_x86_ops);
115
116static bool __read_mostly ignore_msrs = 0;
117module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
118
119static bool __read_mostly report_ignored_msrs = true;
120module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
121
122unsigned int min_timer_period_us = 200;
123module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
124
125static bool __read_mostly kvmclock_periodic_sync = true;
126module_param(kvmclock_periodic_sync, bool, S_IRUGO);
127
128bool __read_mostly kvm_has_tsc_control;
129EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
130u32 __read_mostly kvm_max_guest_tsc_khz;
131EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
132u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
133EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
134u64 __read_mostly kvm_max_tsc_scaling_ratio;
135EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
136u64 __read_mostly kvm_default_tsc_scaling_ratio;
137EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
138
139
140static u32 __read_mostly tsc_tolerance_ppm = 250;
141module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
142
143
144
145
146
147
148
149static int __read_mostly lapic_timer_advance_ns = -1;
150module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
151
152static bool __read_mostly vector_hashing = true;
153module_param(vector_hashing, bool, S_IRUGO);
154
155bool __read_mostly enable_vmware_backdoor = false;
156module_param(enable_vmware_backdoor, bool, S_IRUGO);
157EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
158
159static bool __read_mostly force_emulation_prefix = false;
160module_param(force_emulation_prefix, bool, S_IRUGO);
161
162int __read_mostly pi_inject_timer = -1;
163module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
164
165
166
167
168
169
170#define KVM_MAX_NR_USER_RETURN_MSRS 16
171
172struct kvm_user_return_msrs_global {
173 int nr;
174 u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];
175};
176
177struct kvm_user_return_msrs {
178 struct user_return_notifier urn;
179 bool registered;
180 struct kvm_user_return_msr_values {
181 u64 host;
182 u64 curr;
183 } values[KVM_MAX_NR_USER_RETURN_MSRS];
184};
185
186static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;
187static struct kvm_user_return_msrs __percpu *user_return_msrs;
188
189#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
190 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
191 | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
192 | XFEATURE_MASK_PKRU)
193
194u64 __read_mostly host_efer;
195EXPORT_SYMBOL_GPL(host_efer);
196
197bool __read_mostly allow_smaller_maxphyaddr = 0;
198EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
199
200static u64 __read_mostly host_xss;
201u64 __read_mostly supported_xss;
202EXPORT_SYMBOL_GPL(supported_xss);
203
204struct kvm_stats_debugfs_item debugfs_entries[] = {
205 VCPU_STAT("pf_fixed", pf_fixed),
206 VCPU_STAT("pf_guest", pf_guest),
207 VCPU_STAT("tlb_flush", tlb_flush),
208 VCPU_STAT("invlpg", invlpg),
209 VCPU_STAT("exits", exits),
210 VCPU_STAT("io_exits", io_exits),
211 VCPU_STAT("mmio_exits", mmio_exits),
212 VCPU_STAT("signal_exits", signal_exits),
213 VCPU_STAT("irq_window", irq_window_exits),
214 VCPU_STAT("nmi_window", nmi_window_exits),
215 VCPU_STAT("halt_exits", halt_exits),
216 VCPU_STAT("halt_successful_poll", halt_successful_poll),
217 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
218 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
219 VCPU_STAT("halt_wakeup", halt_wakeup),
220 VCPU_STAT("hypercalls", hypercalls),
221 VCPU_STAT("request_irq", request_irq_exits),
222 VCPU_STAT("irq_exits", irq_exits),
223 VCPU_STAT("host_state_reload", host_state_reload),
224 VCPU_STAT("fpu_reload", fpu_reload),
225 VCPU_STAT("insn_emulation", insn_emulation),
226 VCPU_STAT("insn_emulation_fail", insn_emulation_fail),
227 VCPU_STAT("irq_injections", irq_injections),
228 VCPU_STAT("nmi_injections", nmi_injections),
229 VCPU_STAT("req_event", req_event),
230 VCPU_STAT("l1d_flush", l1d_flush),
231 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
232 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
233 VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
234 VM_STAT("mmu_pte_write", mmu_pte_write),
235 VM_STAT("mmu_pte_updated", mmu_pte_updated),
236 VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
237 VM_STAT("mmu_flooded", mmu_flooded),
238 VM_STAT("mmu_recycled", mmu_recycled),
239 VM_STAT("mmu_cache_miss", mmu_cache_miss),
240 VM_STAT("mmu_unsync", mmu_unsync),
241 VM_STAT("remote_tlb_flush", remote_tlb_flush),
242 VM_STAT("largepages", lpages, .mode = 0444),
243 VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
244 VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
245 { NULL }
246};
247
248u64 __read_mostly host_xcr0;
249u64 __read_mostly supported_xcr0;
250EXPORT_SYMBOL_GPL(supported_xcr0);
251
252static struct kmem_cache *x86_fpu_cache;
253
254static struct kmem_cache *x86_emulator_cache;
255
256
257
258
259
260static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
261 u64 data, bool write)
262{
263 const char *op = write ? "wrmsr" : "rdmsr";
264
265 if (ignore_msrs) {
266 if (report_ignored_msrs)
267 kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n",
268 op, msr, data);
269
270 return true;
271 } else {
272 kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n",
273 op, msr, data);
274 return false;
275 }
276}
277
278static struct kmem_cache *kvm_alloc_emulator_cache(void)
279{
280 unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
281 unsigned int size = sizeof(struct x86_emulate_ctxt);
282
283 return kmem_cache_create_usercopy("x86_emulator", size,
284 __alignof__(struct x86_emulate_ctxt),
285 SLAB_ACCOUNT, useroffset,
286 size - useroffset, NULL);
287}
288
289static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
290
291static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
292{
293 int i;
294 for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
295 vcpu->arch.apf.gfns[i] = ~0;
296}
297
298static void kvm_on_user_return(struct user_return_notifier *urn)
299{
300 unsigned slot;
301 struct kvm_user_return_msrs *msrs
302 = container_of(urn, struct kvm_user_return_msrs, urn);
303 struct kvm_user_return_msr_values *values;
304 unsigned long flags;
305
306
307
308
309
310 local_irq_save(flags);
311 if (msrs->registered) {
312 msrs->registered = false;
313 user_return_notifier_unregister(urn);
314 }
315 local_irq_restore(flags);
316 for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
317 values = &msrs->values[slot];
318 if (values->host != values->curr) {
319 wrmsrl(user_return_msrs_global.msrs[slot], values->host);
320 values->curr = values->host;
321 }
322 }
323}
324
325void kvm_define_user_return_msr(unsigned slot, u32 msr)
326{
327 BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
328 user_return_msrs_global.msrs[slot] = msr;
329 if (slot >= user_return_msrs_global.nr)
330 user_return_msrs_global.nr = slot + 1;
331}
332EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
333
334static void kvm_user_return_msr_cpu_online(void)
335{
336 unsigned int cpu = smp_processor_id();
337 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
338 u64 value;
339 int i;
340
341 for (i = 0; i < user_return_msrs_global.nr; ++i) {
342 rdmsrl_safe(user_return_msrs_global.msrs[i], &value);
343 msrs->values[i].host = value;
344 msrs->values[i].curr = value;
345 }
346}
347
348int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
349{
350 unsigned int cpu = smp_processor_id();
351 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
352 int err;
353
354 value = (value & mask) | (msrs->values[slot].host & ~mask);
355 if (value == msrs->values[slot].curr)
356 return 0;
357 err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);
358 if (err)
359 return 1;
360
361 msrs->values[slot].curr = value;
362 if (!msrs->registered) {
363 msrs->urn.on_user_return = kvm_on_user_return;
364 user_return_notifier_register(&msrs->urn);
365 msrs->registered = true;
366 }
367 return 0;
368}
369EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
370
371static void drop_user_return_notifiers(void)
372{
373 unsigned int cpu = smp_processor_id();
374 struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
375
376 if (msrs->registered)
377 kvm_on_user_return(&msrs->urn);
378}
379
380u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
381{
382 return vcpu->arch.apic_base;
383}
384EXPORT_SYMBOL_GPL(kvm_get_apic_base);
385
386enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
387{
388 return kvm_apic_mode(kvm_get_apic_base(vcpu));
389}
390EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
391
392int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
393{
394 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
395 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
396 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
397 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
398
399 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
400 return 1;
401 if (!msr_info->host_initiated) {
402 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
403 return 1;
404 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
405 return 1;
406 }
407
408 kvm_lapic_set_base(vcpu, msr_info->data);
409 kvm_recalculate_apic_map(vcpu->kvm);
410 return 0;
411}
412EXPORT_SYMBOL_GPL(kvm_set_apic_base);
413
414asmlinkage __visible noinstr void kvm_spurious_fault(void)
415{
416
417 BUG_ON(!kvm_rebooting);
418}
419EXPORT_SYMBOL_GPL(kvm_spurious_fault);
420
421#define EXCPT_BENIGN 0
422#define EXCPT_CONTRIBUTORY 1
423#define EXCPT_PF 2
424
425static int exception_class(int vector)
426{
427 switch (vector) {
428 case PF_VECTOR:
429 return EXCPT_PF;
430 case DE_VECTOR:
431 case TS_VECTOR:
432 case NP_VECTOR:
433 case SS_VECTOR:
434 case GP_VECTOR:
435 return EXCPT_CONTRIBUTORY;
436 default:
437 break;
438 }
439 return EXCPT_BENIGN;
440}
441
442#define EXCPT_FAULT 0
443#define EXCPT_TRAP 1
444#define EXCPT_ABORT 2
445#define EXCPT_INTERRUPT 3
446
447static int exception_type(int vector)
448{
449 unsigned int mask;
450
451 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
452 return EXCPT_INTERRUPT;
453
454 mask = 1 << vector;
455
456
457 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
458 return EXCPT_TRAP;
459
460 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
461 return EXCPT_ABORT;
462
463
464 return EXCPT_FAULT;
465}
466
467void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
468{
469 unsigned nr = vcpu->arch.exception.nr;
470 bool has_payload = vcpu->arch.exception.has_payload;
471 unsigned long payload = vcpu->arch.exception.payload;
472
473 if (!has_payload)
474 return;
475
476 switch (nr) {
477 case DB_VECTOR:
478
479
480
481
482
483 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
484
485
486
487 vcpu->arch.dr6 |= DR6_RTM;
488 vcpu->arch.dr6 |= payload;
489
490
491
492
493
494
495
496
497 vcpu->arch.dr6 ^= payload & DR6_RTM;
498
499
500
501
502
503
504
505 vcpu->arch.dr6 &= ~BIT(12);
506 break;
507 case PF_VECTOR:
508 vcpu->arch.cr2 = payload;
509 break;
510 }
511
512 vcpu->arch.exception.has_payload = false;
513 vcpu->arch.exception.payload = 0;
514}
515EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
516
517static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
518 unsigned nr, bool has_error, u32 error_code,
519 bool has_payload, unsigned long payload, bool reinject)
520{
521 u32 prev_nr;
522 int class1, class2;
523
524 kvm_make_request(KVM_REQ_EVENT, vcpu);
525
526 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
527 queue:
528 if (has_error && !is_protmode(vcpu))
529 has_error = false;
530 if (reinject) {
531
532
533
534
535
536
537
538
539 WARN_ON_ONCE(vcpu->arch.exception.pending);
540 vcpu->arch.exception.injected = true;
541 if (WARN_ON_ONCE(has_payload)) {
542
543
544
545
546 has_payload = false;
547 payload = 0;
548 }
549 } else {
550 vcpu->arch.exception.pending = true;
551 vcpu->arch.exception.injected = false;
552 }
553 vcpu->arch.exception.has_error_code = has_error;
554 vcpu->arch.exception.nr = nr;
555 vcpu->arch.exception.error_code = error_code;
556 vcpu->arch.exception.has_payload = has_payload;
557 vcpu->arch.exception.payload = payload;
558 if (!is_guest_mode(vcpu))
559 kvm_deliver_exception_payload(vcpu);
560 return;
561 }
562
563
564 prev_nr = vcpu->arch.exception.nr;
565 if (prev_nr == DF_VECTOR) {
566
567 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
568 return;
569 }
570 class1 = exception_class(prev_nr);
571 class2 = exception_class(nr);
572 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
573 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
574
575
576
577
578
579 vcpu->arch.exception.pending = true;
580 vcpu->arch.exception.injected = false;
581 vcpu->arch.exception.has_error_code = true;
582 vcpu->arch.exception.nr = DF_VECTOR;
583 vcpu->arch.exception.error_code = 0;
584 vcpu->arch.exception.has_payload = false;
585 vcpu->arch.exception.payload = 0;
586 } else
587
588
589
590 goto queue;
591}
592
593void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
594{
595 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
596}
597EXPORT_SYMBOL_GPL(kvm_queue_exception);
598
599void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
600{
601 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
602}
603EXPORT_SYMBOL_GPL(kvm_requeue_exception);
604
605void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
606 unsigned long payload)
607{
608 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
609}
610EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
611
612static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
613 u32 error_code, unsigned long payload)
614{
615 kvm_multiple_exception(vcpu, nr, true, error_code,
616 true, payload, false);
617}
618
619int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
620{
621 if (err)
622 kvm_inject_gp(vcpu, 0);
623 else
624 return kvm_skip_emulated_instruction(vcpu);
625
626 return 1;
627}
628EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
629
630void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
631{
632 ++vcpu->stat.pf_guest;
633 vcpu->arch.exception.nested_apf =
634 is_guest_mode(vcpu) && fault->async_page_fault;
635 if (vcpu->arch.exception.nested_apf) {
636 vcpu->arch.apf.nested_apf_token = fault->address;
637 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
638 } else {
639 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
640 fault->address);
641 }
642}
643EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
644
645bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
646 struct x86_exception *fault)
647{
648 struct kvm_mmu *fault_mmu;
649 WARN_ON_ONCE(fault->vector != PF_VECTOR);
650
651 fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
652 vcpu->arch.walk_mmu;
653
654
655
656
657
658 if ((fault->error_code & PFERR_PRESENT_MASK) &&
659 !(fault->error_code & PFERR_RSVD_MASK))
660 kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
661 fault_mmu->root_hpa);
662
663 fault_mmu->inject_page_fault(vcpu, fault);
664 return fault->nested_page_fault;
665}
666EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
667
668void kvm_inject_nmi(struct kvm_vcpu *vcpu)
669{
670 atomic_inc(&vcpu->arch.nmi_queued);
671 kvm_make_request(KVM_REQ_NMI, vcpu);
672}
673EXPORT_SYMBOL_GPL(kvm_inject_nmi);
674
675void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
676{
677 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
678}
679EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
680
681void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
682{
683 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
684}
685EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
686
687
688
689
690
691bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
692{
693 if (kvm_x86_ops.get_cpl(vcpu) <= required_cpl)
694 return true;
695 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
696 return false;
697}
698EXPORT_SYMBOL_GPL(kvm_require_cpl);
699
700bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
701{
702 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
703 return true;
704
705 kvm_queue_exception(vcpu, UD_VECTOR);
706 return false;
707}
708EXPORT_SYMBOL_GPL(kvm_require_dr);
709
710
711
712
713
714
715int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
716 gfn_t ngfn, void *data, int offset, int len,
717 u32 access)
718{
719 struct x86_exception exception;
720 gfn_t real_gfn;
721 gpa_t ngpa;
722
723 ngpa = gfn_to_gpa(ngfn);
724 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
725 if (real_gfn == UNMAPPED_GVA)
726 return -EFAULT;
727
728 real_gfn = gpa_to_gfn(real_gfn);
729
730 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
731}
732EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
733
734static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
735 void *data, int offset, int len, u32 access)
736{
737 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
738 data, offset, len, access);
739}
740
741static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
742{
743 return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
744 rsvd_bits(1, 2);
745}
746
747
748
749
750int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
751{
752 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
753 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
754 int i;
755 int ret;
756 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
757
758 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
759 offset * sizeof(u64), sizeof(pdpte),
760 PFERR_USER_MASK|PFERR_WRITE_MASK);
761 if (ret < 0) {
762 ret = 0;
763 goto out;
764 }
765 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
766 if ((pdpte[i] & PT_PRESENT_MASK) &&
767 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
768 ret = 0;
769 goto out;
770 }
771 }
772 ret = 1;
773
774 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
775 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
776
777out:
778
779 return ret;
780}
781EXPORT_SYMBOL_GPL(load_pdptrs);
782
783bool pdptrs_changed(struct kvm_vcpu *vcpu)
784{
785 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
786 int offset;
787 gfn_t gfn;
788 int r;
789
790 if (!is_pae_paging(vcpu))
791 return false;
792
793 if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
794 return true;
795
796 gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
797 offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
798 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
799 PFERR_USER_MASK | PFERR_WRITE_MASK);
800 if (r < 0)
801 return true;
802
803 return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
804}
805EXPORT_SYMBOL_GPL(pdptrs_changed);
806
807int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
808{
809 unsigned long old_cr0 = kvm_read_cr0(vcpu);
810 unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
811 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
812
813 cr0 |= X86_CR0_ET;
814
815#ifdef CONFIG_X86_64
816 if (cr0 & 0xffffffff00000000UL)
817 return 1;
818#endif
819
820 cr0 &= ~CR0_RESERVED_BITS;
821
822 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
823 return 1;
824
825 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
826 return 1;
827
828#ifdef CONFIG_X86_64
829 if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
830 (cr0 & X86_CR0_PG)) {
831 int cs_db, cs_l;
832
833 if (!is_pae(vcpu))
834 return 1;
835 kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
836 if (cs_l)
837 return 1;
838 }
839#endif
840 if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
841 is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
842 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
843 return 1;
844
845 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
846 return 1;
847
848 kvm_x86_ops.set_cr0(vcpu, cr0);
849
850 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
851 kvm_clear_async_pf_completion_queue(vcpu);
852 kvm_async_pf_hash_reset(vcpu);
853 }
854
855 if ((cr0 ^ old_cr0) & update_bits)
856 kvm_mmu_reset_context(vcpu);
857
858 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
859 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
860 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
861 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
862
863 return 0;
864}
865EXPORT_SYMBOL_GPL(kvm_set_cr0);
866
867void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
868{
869 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
870}
871EXPORT_SYMBOL_GPL(kvm_lmsw);
872
873void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
874{
875 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
876
877 if (vcpu->arch.xcr0 != host_xcr0)
878 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
879
880 if (vcpu->arch.xsaves_enabled &&
881 vcpu->arch.ia32_xss != host_xss)
882 wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
883 }
884
885 if (static_cpu_has(X86_FEATURE_PKU) &&
886 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
887 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
888 vcpu->arch.pkru != vcpu->arch.host_pkru)
889 __write_pkru(vcpu->arch.pkru);
890}
891EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
892
893void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
894{
895 if (static_cpu_has(X86_FEATURE_PKU) &&
896 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
897 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
898 vcpu->arch.pkru = rdpkru();
899 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
900 __write_pkru(vcpu->arch.host_pkru);
901 }
902
903 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
904
905 if (vcpu->arch.xcr0 != host_xcr0)
906 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
907
908 if (vcpu->arch.xsaves_enabled &&
909 vcpu->arch.ia32_xss != host_xss)
910 wrmsrl(MSR_IA32_XSS, host_xss);
911 }
912
913}
914EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
915
916static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
917{
918 u64 xcr0 = xcr;
919 u64 old_xcr0 = vcpu->arch.xcr0;
920 u64 valid_bits;
921
922
923 if (index != XCR_XFEATURE_ENABLED_MASK)
924 return 1;
925 if (!(xcr0 & XFEATURE_MASK_FP))
926 return 1;
927 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
928 return 1;
929
930
931
932
933
934
935 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
936 if (xcr0 & ~valid_bits)
937 return 1;
938
939 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
940 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
941 return 1;
942
943 if (xcr0 & XFEATURE_MASK_AVX512) {
944 if (!(xcr0 & XFEATURE_MASK_YMM))
945 return 1;
946 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
947 return 1;
948 }
949 vcpu->arch.xcr0 = xcr0;
950
951 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
952 kvm_update_cpuid_runtime(vcpu);
953 return 0;
954}
955
956int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
957{
958 if (kvm_x86_ops.get_cpl(vcpu) != 0 ||
959 __kvm_set_xcr(vcpu, index, xcr)) {
960 kvm_inject_gp(vcpu, 0);
961 return 1;
962 }
963 return 0;
964}
965EXPORT_SYMBOL_GPL(kvm_set_xcr);
966
967int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
968{
969 if (cr4 & cr4_reserved_bits)
970 return -EINVAL;
971
972 if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
973 return -EINVAL;
974
975 return 0;
976}
977EXPORT_SYMBOL_GPL(kvm_valid_cr4);
978
979int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
980{
981 unsigned long old_cr4 = kvm_read_cr4(vcpu);
982 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
983 X86_CR4_SMEP;
984 unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
985
986 if (kvm_valid_cr4(vcpu, cr4))
987 return 1;
988
989 if (is_long_mode(vcpu)) {
990 if (!(cr4 & X86_CR4_PAE))
991 return 1;
992 if ((cr4 ^ old_cr4) & X86_CR4_LA57)
993 return 1;
994 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
995 && ((cr4 ^ old_cr4) & pdptr_bits)
996 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
997 kvm_read_cr3(vcpu)))
998 return 1;
999
1000 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
1001 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
1002 return 1;
1003
1004
1005 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
1006 return 1;
1007 }
1008
1009 if (kvm_x86_ops.set_cr4(vcpu, cr4))
1010 return 1;
1011
1012 if (((cr4 ^ old_cr4) & mmu_role_bits) ||
1013 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
1014 kvm_mmu_reset_context(vcpu);
1015
1016 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
1017 kvm_update_cpuid_runtime(vcpu);
1018
1019 return 0;
1020}
1021EXPORT_SYMBOL_GPL(kvm_set_cr4);
1022
1023int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1024{
1025 bool skip_tlb_flush = false;
1026#ifdef CONFIG_X86_64
1027 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
1028
1029 if (pcid_enabled) {
1030 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
1031 cr3 &= ~X86_CR3_PCID_NOFLUSH;
1032 }
1033#endif
1034
1035 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
1036 if (!skip_tlb_flush) {
1037 kvm_mmu_sync_roots(vcpu);
1038 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1039 }
1040 return 0;
1041 }
1042
1043 if (is_long_mode(vcpu) &&
1044 (cr3 & vcpu->arch.cr3_lm_rsvd_bits))
1045 return 1;
1046 else if (is_pae_paging(vcpu) &&
1047 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
1048 return 1;
1049
1050 kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
1051 vcpu->arch.cr3 = cr3;
1052 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
1053
1054 return 0;
1055}
1056EXPORT_SYMBOL_GPL(kvm_set_cr3);
1057
1058int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
1059{
1060 if (cr8 & CR8_RESERVED_BITS)
1061 return 1;
1062 if (lapic_in_kernel(vcpu))
1063 kvm_lapic_set_tpr(vcpu, cr8);
1064 else
1065 vcpu->arch.cr8 = cr8;
1066 return 0;
1067}
1068EXPORT_SYMBOL_GPL(kvm_set_cr8);
1069
1070unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1071{
1072 if (lapic_in_kernel(vcpu))
1073 return kvm_lapic_get_cr8(vcpu);
1074 else
1075 return vcpu->arch.cr8;
1076}
1077EXPORT_SYMBOL_GPL(kvm_get_cr8);
1078
1079static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1080{
1081 int i;
1082
1083 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1084 for (i = 0; i < KVM_NR_DB_REGS; i++)
1085 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1086 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
1087 }
1088}
1089
1090void kvm_update_dr7(struct kvm_vcpu *vcpu)
1091{
1092 unsigned long dr7;
1093
1094 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1095 dr7 = vcpu->arch.guest_debug_dr7;
1096 else
1097 dr7 = vcpu->arch.dr7;
1098 kvm_x86_ops.set_dr7(vcpu, dr7);
1099 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1100 if (dr7 & DR7_BP_EN_MASK)
1101 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1102}
1103EXPORT_SYMBOL_GPL(kvm_update_dr7);
1104
1105static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1106{
1107 u64 fixed = DR6_FIXED_1;
1108
1109 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1110 fixed |= DR6_RTM;
1111 return fixed;
1112}
1113
1114static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1115{
1116 size_t size = ARRAY_SIZE(vcpu->arch.db);
1117
1118 switch (dr) {
1119 case 0 ... 3:
1120 vcpu->arch.db[array_index_nospec(dr, size)] = val;
1121 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1122 vcpu->arch.eff_db[dr] = val;
1123 break;
1124 case 4:
1125 case 6:
1126 if (!kvm_dr6_valid(val))
1127 return -1;
1128 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1129 break;
1130 case 5:
1131 default:
1132 if (!kvm_dr7_valid(val))
1133 return -1;
1134 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1135 kvm_update_dr7(vcpu);
1136 break;
1137 }
1138
1139 return 0;
1140}
1141
1142int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1143{
1144 if (__kvm_set_dr(vcpu, dr, val)) {
1145 kvm_inject_gp(vcpu, 0);
1146 return 1;
1147 }
1148 return 0;
1149}
1150EXPORT_SYMBOL_GPL(kvm_set_dr);
1151
1152int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1153{
1154 size_t size = ARRAY_SIZE(vcpu->arch.db);
1155
1156 switch (dr) {
1157 case 0 ... 3:
1158 *val = vcpu->arch.db[array_index_nospec(dr, size)];
1159 break;
1160 case 4:
1161 case 6:
1162 *val = vcpu->arch.dr6;
1163 break;
1164 case 5:
1165 default:
1166 *val = vcpu->arch.dr7;
1167 break;
1168 }
1169 return 0;
1170}
1171EXPORT_SYMBOL_GPL(kvm_get_dr);
1172
1173bool kvm_rdpmc(struct kvm_vcpu *vcpu)
1174{
1175 u32 ecx = kvm_rcx_read(vcpu);
1176 u64 data;
1177 int err;
1178
1179 err = kvm_pmu_rdpmc(vcpu, ecx, &data);
1180 if (err)
1181 return err;
1182 kvm_rax_write(vcpu, (u32)data);
1183 kvm_rdx_write(vcpu, data >> 32);
1184 return err;
1185}
1186EXPORT_SYMBOL_GPL(kvm_rdpmc);
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200static const u32 msrs_to_save_all[] = {
1201 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1202 MSR_STAR,
1203#ifdef CONFIG_X86_64
1204 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1205#endif
1206 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1207 MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1208 MSR_IA32_SPEC_CTRL,
1209 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1210 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1211 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1212 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1213 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1214 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1215 MSR_IA32_UMWAIT_CONTROL,
1216
1217 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1218 MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
1219 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1220 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1221 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1222 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1223 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1224 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1225 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1226 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1227 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1228 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1229 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1230 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1231 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1232 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1233 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1234 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1235 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1236 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1237 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1238 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1239};
1240
1241static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1242static unsigned num_msrs_to_save;
1243
1244static const u32 emulated_msrs_all[] = {
1245 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1246 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1247 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1248 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1249 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1250 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1251 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1252 HV_X64_MSR_RESET,
1253 HV_X64_MSR_VP_INDEX,
1254 HV_X64_MSR_VP_RUNTIME,
1255 HV_X64_MSR_SCONTROL,
1256 HV_X64_MSR_STIMER0_CONFIG,
1257 HV_X64_MSR_VP_ASSIST_PAGE,
1258 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1259 HV_X64_MSR_TSC_EMULATION_STATUS,
1260 HV_X64_MSR_SYNDBG_OPTIONS,
1261 HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
1262 HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
1263 HV_X64_MSR_SYNDBG_PENDING_BUFFER,
1264
1265 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1266 MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
1267
1268 MSR_IA32_TSC_ADJUST,
1269 MSR_IA32_TSCDEADLINE,
1270 MSR_IA32_ARCH_CAPABILITIES,
1271 MSR_IA32_PERF_CAPABILITIES,
1272 MSR_IA32_MISC_ENABLE,
1273 MSR_IA32_MCG_STATUS,
1274 MSR_IA32_MCG_CTL,
1275 MSR_IA32_MCG_EXT_CTL,
1276 MSR_IA32_SMBASE,
1277 MSR_SMI_COUNT,
1278 MSR_PLATFORM_INFO,
1279 MSR_MISC_FEATURES_ENABLES,
1280 MSR_AMD64_VIRT_SPEC_CTRL,
1281 MSR_IA32_POWER_CTL,
1282 MSR_IA32_UCODE_REV,
1283
1284
1285
1286
1287
1288
1289
1290
1291 MSR_IA32_VMX_BASIC,
1292 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1293 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1294 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1295 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1296 MSR_IA32_VMX_MISC,
1297 MSR_IA32_VMX_CR0_FIXED0,
1298 MSR_IA32_VMX_CR4_FIXED0,
1299 MSR_IA32_VMX_VMCS_ENUM,
1300 MSR_IA32_VMX_PROCBASED_CTLS2,
1301 MSR_IA32_VMX_EPT_VPID_CAP,
1302 MSR_IA32_VMX_VMFUNC,
1303
1304 MSR_K7_HWCR,
1305 MSR_KVM_POLL_CONTROL,
1306};
1307
1308static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1309static unsigned num_emulated_msrs;
1310
1311
1312
1313
1314
1315static const u32 msr_based_features_all[] = {
1316 MSR_IA32_VMX_BASIC,
1317 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1318 MSR_IA32_VMX_PINBASED_CTLS,
1319 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1320 MSR_IA32_VMX_PROCBASED_CTLS,
1321 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1322 MSR_IA32_VMX_EXIT_CTLS,
1323 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1324 MSR_IA32_VMX_ENTRY_CTLS,
1325 MSR_IA32_VMX_MISC,
1326 MSR_IA32_VMX_CR0_FIXED0,
1327 MSR_IA32_VMX_CR0_FIXED1,
1328 MSR_IA32_VMX_CR4_FIXED0,
1329 MSR_IA32_VMX_CR4_FIXED1,
1330 MSR_IA32_VMX_VMCS_ENUM,
1331 MSR_IA32_VMX_PROCBASED_CTLS2,
1332 MSR_IA32_VMX_EPT_VPID_CAP,
1333 MSR_IA32_VMX_VMFUNC,
1334
1335 MSR_F10H_DECFG,
1336 MSR_IA32_UCODE_REV,
1337 MSR_IA32_ARCH_CAPABILITIES,
1338 MSR_IA32_PERF_CAPABILITIES,
1339};
1340
1341static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1342static unsigned int num_msr_based_features;
1343
1344static u64 kvm_get_arch_capabilities(void)
1345{
1346 u64 data = 0;
1347
1348 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1349 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1350
1351
1352
1353
1354
1355
1356
1357 data |= ARCH_CAP_PSCHANGE_MC_NO;
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1369 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1370
1371 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1372 data |= ARCH_CAP_RDCL_NO;
1373 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1374 data |= ARCH_CAP_SSB_NO;
1375 if (!boot_cpu_has_bug(X86_BUG_MDS))
1376 data |= ARCH_CAP_MDS_NO;
1377
1378
1379
1380
1381
1382
1383
1384 if (!boot_cpu_has(X86_FEATURE_RTM))
1385 data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
1386 else if (!boot_cpu_has_bug(X86_BUG_TAA))
1387 data |= ARCH_CAP_TAA_NO;
1388
1389 return data;
1390}
1391
1392static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1393{
1394 switch (msr->index) {
1395 case MSR_IA32_ARCH_CAPABILITIES:
1396 msr->data = kvm_get_arch_capabilities();
1397 break;
1398 case MSR_IA32_UCODE_REV:
1399 rdmsrl_safe(msr->index, &msr->data);
1400 break;
1401 default:
1402 return kvm_x86_ops.get_msr_feature(msr);
1403 }
1404 return 0;
1405}
1406
1407static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1408{
1409 struct kvm_msr_entry msr;
1410 int r;
1411
1412 msr.index = index;
1413 r = kvm_get_msr_feature(&msr);
1414
1415 if (r == KVM_MSR_RET_INVALID) {
1416
1417 *data = 0;
1418 if (kvm_msr_ignored_check(vcpu, index, 0, false))
1419 r = 0;
1420 }
1421
1422 if (r)
1423 return r;
1424
1425 *data = msr.data;
1426
1427 return 0;
1428}
1429
1430static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1431{
1432 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1433 return false;
1434
1435 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1436 return false;
1437
1438 if (efer & (EFER_LME | EFER_LMA) &&
1439 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1440 return false;
1441
1442 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1443 return false;
1444
1445 return true;
1446
1447}
1448bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1449{
1450 if (efer & efer_reserved_bits)
1451 return false;
1452
1453 return __kvm_valid_efer(vcpu, efer);
1454}
1455EXPORT_SYMBOL_GPL(kvm_valid_efer);
1456
1457static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1458{
1459 u64 old_efer = vcpu->arch.efer;
1460 u64 efer = msr_info->data;
1461 int r;
1462
1463 if (efer & efer_reserved_bits)
1464 return 1;
1465
1466 if (!msr_info->host_initiated) {
1467 if (!__kvm_valid_efer(vcpu, efer))
1468 return 1;
1469
1470 if (is_paging(vcpu) &&
1471 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1472 return 1;
1473 }
1474
1475 efer &= ~EFER_LMA;
1476 efer |= vcpu->arch.efer & EFER_LMA;
1477
1478 r = kvm_x86_ops.set_efer(vcpu, efer);
1479 if (r) {
1480 WARN_ON(r > 0);
1481 return r;
1482 }
1483
1484
1485 if ((efer ^ old_efer) & EFER_NX)
1486 kvm_mmu_reset_context(vcpu);
1487
1488 return 0;
1489}
1490
1491void kvm_enable_efer_bits(u64 mask)
1492{
1493 efer_reserved_bits &= ~mask;
1494}
1495EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1496
1497bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
1498{
1499 struct kvm *kvm = vcpu->kvm;
1500 struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
1501 u32 count = kvm->arch.msr_filter.count;
1502 u32 i;
1503 bool r = kvm->arch.msr_filter.default_allow;
1504 int idx;
1505
1506
1507 if (!count || (index >= 0x800 && index <= 0x8ff))
1508 return true;
1509
1510
1511 idx = srcu_read_lock(&kvm->srcu);
1512
1513 for (i = 0; i < count; i++) {
1514 u32 start = ranges[i].base;
1515 u32 end = start + ranges[i].nmsrs;
1516 u32 flags = ranges[i].flags;
1517 unsigned long *bitmap = ranges[i].bitmap;
1518
1519 if ((index >= start) && (index < end) && (flags & type)) {
1520 r = !!test_bit(index - start, bitmap);
1521 break;
1522 }
1523 }
1524
1525 srcu_read_unlock(&kvm->srcu, idx);
1526
1527 return r;
1528}
1529EXPORT_SYMBOL_GPL(kvm_msr_allowed);
1530
1531
1532
1533
1534
1535
1536
1537static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1538 bool host_initiated)
1539{
1540 struct msr_data msr;
1541
1542 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE))
1543 return KVM_MSR_RET_FILTERED;
1544
1545 switch (index) {
1546 case MSR_FS_BASE:
1547 case MSR_GS_BASE:
1548 case MSR_KERNEL_GS_BASE:
1549 case MSR_CSTAR:
1550 case MSR_LSTAR:
1551 if (is_noncanonical_address(data, vcpu))
1552 return 1;
1553 break;
1554 case MSR_IA32_SYSENTER_EIP:
1555 case MSR_IA32_SYSENTER_ESP:
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
1569 }
1570
1571 msr.data = data;
1572 msr.index = index;
1573 msr.host_initiated = host_initiated;
1574
1575 return kvm_x86_ops.set_msr(vcpu, &msr);
1576}
1577
1578static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
1579 u32 index, u64 data, bool host_initiated)
1580{
1581 int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
1582
1583 if (ret == KVM_MSR_RET_INVALID)
1584 if (kvm_msr_ignored_check(vcpu, index, data, true))
1585 ret = 0;
1586
1587 return ret;
1588}
1589
1590
1591
1592
1593
1594
1595
1596int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1597 bool host_initiated)
1598{
1599 struct msr_data msr;
1600 int ret;
1601
1602 if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
1603 return KVM_MSR_RET_FILTERED;
1604
1605 msr.index = index;
1606 msr.host_initiated = host_initiated;
1607
1608 ret = kvm_x86_ops.get_msr(vcpu, &msr);
1609 if (!ret)
1610 *data = msr.data;
1611 return ret;
1612}
1613
1614static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
1615 u32 index, u64 *data, bool host_initiated)
1616{
1617 int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
1618
1619 if (ret == KVM_MSR_RET_INVALID) {
1620
1621 *data = 0;
1622 if (kvm_msr_ignored_check(vcpu, index, 0, false))
1623 ret = 0;
1624 }
1625
1626 return ret;
1627}
1628
1629int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1630{
1631 return kvm_get_msr_ignored_check(vcpu, index, data, false);
1632}
1633EXPORT_SYMBOL_GPL(kvm_get_msr);
1634
1635int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1636{
1637 return kvm_set_msr_ignored_check(vcpu, index, data, false);
1638}
1639EXPORT_SYMBOL_GPL(kvm_set_msr);
1640
1641static int complete_emulated_msr(struct kvm_vcpu *vcpu, bool is_read)
1642{
1643 if (vcpu->run->msr.error) {
1644 kvm_inject_gp(vcpu, 0);
1645 return 1;
1646 } else if (is_read) {
1647 kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
1648 kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
1649 }
1650
1651 return kvm_skip_emulated_instruction(vcpu);
1652}
1653
1654static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
1655{
1656 return complete_emulated_msr(vcpu, true);
1657}
1658
1659static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
1660{
1661 return complete_emulated_msr(vcpu, false);
1662}
1663
1664static u64 kvm_msr_reason(int r)
1665{
1666 switch (r) {
1667 case KVM_MSR_RET_INVALID:
1668 return KVM_MSR_EXIT_REASON_UNKNOWN;
1669 case KVM_MSR_RET_FILTERED:
1670 return KVM_MSR_EXIT_REASON_FILTER;
1671 default:
1672 return KVM_MSR_EXIT_REASON_INVAL;
1673 }
1674}
1675
1676static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
1677 u32 exit_reason, u64 data,
1678 int (*completion)(struct kvm_vcpu *vcpu),
1679 int r)
1680{
1681 u64 msr_reason = kvm_msr_reason(r);
1682
1683
1684 if (!(vcpu->kvm->arch.user_space_msr_mask & msr_reason))
1685 return 0;
1686
1687 vcpu->run->exit_reason = exit_reason;
1688 vcpu->run->msr.error = 0;
1689 memset(vcpu->run->msr.pad, 0, sizeof(vcpu->run->msr.pad));
1690 vcpu->run->msr.reason = msr_reason;
1691 vcpu->run->msr.index = index;
1692 vcpu->run->msr.data = data;
1693 vcpu->arch.complete_userspace_io = completion;
1694
1695 return 1;
1696}
1697
1698static int kvm_get_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int r)
1699{
1700 return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_RDMSR, 0,
1701 complete_emulated_rdmsr, r);
1702}
1703
1704static int kvm_set_msr_user_space(struct kvm_vcpu *vcpu, u32 index, u64 data, int r)
1705{
1706 return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_WRMSR, data,
1707 complete_emulated_wrmsr, r);
1708}
1709
1710int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1711{
1712 u32 ecx = kvm_rcx_read(vcpu);
1713 u64 data;
1714 int r;
1715
1716 r = kvm_get_msr(vcpu, ecx, &data);
1717
1718
1719 if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
1720
1721 return 0;
1722 }
1723
1724
1725 if (r) {
1726 trace_kvm_msr_read_ex(ecx);
1727 kvm_inject_gp(vcpu, 0);
1728 return 1;
1729 }
1730
1731 trace_kvm_msr_read(ecx, data);
1732
1733 kvm_rax_write(vcpu, data & -1u);
1734 kvm_rdx_write(vcpu, (data >> 32) & -1u);
1735 return kvm_skip_emulated_instruction(vcpu);
1736}
1737EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
1738
1739int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
1740{
1741 u32 ecx = kvm_rcx_read(vcpu);
1742 u64 data = kvm_read_edx_eax(vcpu);
1743 int r;
1744
1745 r = kvm_set_msr(vcpu, ecx, data);
1746
1747
1748 if (r && kvm_set_msr_user_space(vcpu, ecx, data, r))
1749
1750 return 0;
1751
1752
1753 if (r < 0)
1754 return r;
1755
1756
1757 if (r > 0) {
1758 trace_kvm_msr_write_ex(ecx, data);
1759 kvm_inject_gp(vcpu, 0);
1760 return 1;
1761 }
1762
1763 trace_kvm_msr_write(ecx, data);
1764 return kvm_skip_emulated_instruction(vcpu);
1765}
1766EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
1767
1768bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
1769{
1770 return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
1771 xfer_to_guest_mode_work_pending();
1772}
1773EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request);
1774
1775
1776
1777
1778
1779
1780
1781
1782static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
1783{
1784 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
1785 return 1;
1786
1787 if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
1788 ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
1789 ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
1790 ((u32)(data >> 32) != X2APIC_BROADCAST)) {
1791
1792 data &= ~(1 << 12);
1793 kvm_apic_send_ipi(vcpu->arch.apic, (u32)data, (u32)(data >> 32));
1794 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
1795 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR, (u32)data);
1796 trace_kvm_apic_write(APIC_ICR, (u32)data);
1797 return 0;
1798 }
1799
1800 return 1;
1801}
1802
1803static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
1804{
1805 if (!kvm_can_use_hv_timer(vcpu))
1806 return 1;
1807
1808 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1809 return 0;
1810}
1811
1812fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
1813{
1814 u32 msr = kvm_rcx_read(vcpu);
1815 u64 data;
1816 fastpath_t ret = EXIT_FASTPATH_NONE;
1817
1818 switch (msr) {
1819 case APIC_BASE_MSR + (APIC_ICR >> 4):
1820 data = kvm_read_edx_eax(vcpu);
1821 if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
1822 kvm_skip_emulated_instruction(vcpu);
1823 ret = EXIT_FASTPATH_EXIT_HANDLED;
1824 }
1825 break;
1826 case MSR_IA32_TSCDEADLINE:
1827 data = kvm_read_edx_eax(vcpu);
1828 if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
1829 kvm_skip_emulated_instruction(vcpu);
1830 ret = EXIT_FASTPATH_REENTER_GUEST;
1831 }
1832 break;
1833 default:
1834 break;
1835 }
1836
1837 if (ret != EXIT_FASTPATH_NONE)
1838 trace_kvm_msr_write(msr, data);
1839
1840 return ret;
1841}
1842EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
1843
1844
1845
1846
1847static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1848{
1849 return kvm_get_msr_ignored_check(vcpu, index, data, true);
1850}
1851
1852static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1853{
1854 return kvm_set_msr_ignored_check(vcpu, index, *data, true);
1855}
1856
1857#ifdef CONFIG_X86_64
1858struct pvclock_clock {
1859 int vclock_mode;
1860 u64 cycle_last;
1861 u64 mask;
1862 u32 mult;
1863 u32 shift;
1864 u64 base_cycles;
1865 u64 offset;
1866};
1867
1868struct pvclock_gtod_data {
1869 seqcount_t seq;
1870
1871 struct pvclock_clock clock;
1872 struct pvclock_clock raw_clock;
1873
1874 ktime_t offs_boot;
1875 u64 wall_time_sec;
1876};
1877
1878static struct pvclock_gtod_data pvclock_gtod_data;
1879
1880static void update_pvclock_gtod(struct timekeeper *tk)
1881{
1882 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1883
1884 write_seqcount_begin(&vdata->seq);
1885
1886
1887 vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
1888 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1889 vdata->clock.mask = tk->tkr_mono.mask;
1890 vdata->clock.mult = tk->tkr_mono.mult;
1891 vdata->clock.shift = tk->tkr_mono.shift;
1892 vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
1893 vdata->clock.offset = tk->tkr_mono.base;
1894
1895 vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
1896 vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
1897 vdata->raw_clock.mask = tk->tkr_raw.mask;
1898 vdata->raw_clock.mult = tk->tkr_raw.mult;
1899 vdata->raw_clock.shift = tk->tkr_raw.shift;
1900 vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
1901 vdata->raw_clock.offset = tk->tkr_raw.base;
1902
1903 vdata->wall_time_sec = tk->xtime_sec;
1904
1905 vdata->offs_boot = tk->offs_boot;
1906
1907 write_seqcount_end(&vdata->seq);
1908}
1909
1910static s64 get_kvmclock_base_ns(void)
1911{
1912
1913 return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
1914}
1915#else
1916static s64 get_kvmclock_base_ns(void)
1917{
1918
1919 return ktime_get_boottime_ns();
1920}
1921#endif
1922
1923static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1924{
1925 int version;
1926 int r;
1927 struct pvclock_wall_clock wc;
1928 u64 wall_nsec;
1929
1930 kvm->arch.wall_clock = wall_clock;
1931
1932 if (!wall_clock)
1933 return;
1934
1935 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1936 if (r)
1937 return;
1938
1939 if (version & 1)
1940 ++version;
1941
1942 ++version;
1943
1944 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
1945 return;
1946
1947
1948
1949
1950
1951
1952 wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
1953
1954 wc.nsec = do_div(wall_nsec, 1000000000);
1955 wc.sec = (u32)wall_nsec;
1956 wc.version = version;
1957
1958 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1959
1960 version++;
1961 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1962}
1963
1964static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
1965 bool old_msr, bool host_initiated)
1966{
1967 struct kvm_arch *ka = &vcpu->kvm->arch;
1968
1969 if (vcpu->vcpu_id == 0 && !host_initiated) {
1970 if (ka->boot_vcpu_runs_old_kvmclock != old_msr)
1971 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1972
1973 ka->boot_vcpu_runs_old_kvmclock = old_msr;
1974 }
1975
1976 vcpu->arch.time = system_time;
1977 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
1978
1979
1980 vcpu->arch.pv_time_enabled = false;
1981 if (!(system_time & 1))
1982 return;
1983
1984 if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
1985 &vcpu->arch.pv_time, system_time & ~1ULL,
1986 sizeof(struct pvclock_vcpu_time_info)))
1987 vcpu->arch.pv_time_enabled = true;
1988
1989 return;
1990}
1991
1992static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1993{
1994 do_shl32_div32(dividend, divisor);
1995 return dividend;
1996}
1997
1998static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1999 s8 *pshift, u32 *pmultiplier)
2000{
2001 uint64_t scaled64;
2002 int32_t shift = 0;
2003 uint64_t tps64;
2004 uint32_t tps32;
2005
2006 tps64 = base_hz;
2007 scaled64 = scaled_hz;
2008 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
2009 tps64 >>= 1;
2010 shift--;
2011 }
2012
2013 tps32 = (uint32_t)tps64;
2014 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
2015 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
2016 scaled64 >>= 1;
2017 else
2018 tps32 <<= 1;
2019 shift++;
2020 }
2021
2022 *pshift = shift;
2023 *pmultiplier = div_frac(scaled64, tps32);
2024}
2025
2026#ifdef CONFIG_X86_64
2027static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
2028#endif
2029
2030static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
2031static unsigned long max_tsc_khz;
2032
2033static u32 adjust_tsc_khz(u32 khz, s32 ppm)
2034{
2035 u64 v = (u64)khz * (1000000 + ppm);
2036 do_div(v, 1000000);
2037 return v;
2038}
2039
2040static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
2041{
2042 u64 ratio;
2043
2044
2045 if (!scale) {
2046 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
2047 return 0;
2048 }
2049
2050
2051 if (!kvm_has_tsc_control) {
2052 if (user_tsc_khz > tsc_khz) {
2053 vcpu->arch.tsc_catchup = 1;
2054 vcpu->arch.tsc_always_catchup = 1;
2055 return 0;
2056 } else {
2057 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
2058 return -1;
2059 }
2060 }
2061
2062
2063 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
2064 user_tsc_khz, tsc_khz);
2065
2066 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
2067 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
2068 user_tsc_khz);
2069 return -1;
2070 }
2071
2072 vcpu->arch.tsc_scaling_ratio = ratio;
2073 return 0;
2074}
2075
2076static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
2077{
2078 u32 thresh_lo, thresh_hi;
2079 int use_scaling = 0;
2080
2081
2082 if (user_tsc_khz == 0) {
2083
2084 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
2085 return -1;
2086 }
2087
2088
2089 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
2090 &vcpu->arch.virtual_tsc_shift,
2091 &vcpu->arch.virtual_tsc_mult);
2092 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
2093
2094
2095
2096
2097
2098
2099
2100 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
2101 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
2102 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
2103 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
2104 use_scaling = 1;
2105 }
2106 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
2107}
2108
2109static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
2110{
2111 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
2112 vcpu->arch.virtual_tsc_mult,
2113 vcpu->arch.virtual_tsc_shift);
2114 tsc += vcpu->arch.this_tsc_write;
2115 return tsc;
2116}
2117
2118static inline int gtod_is_based_on_tsc(int mode)
2119{
2120 return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
2121}
2122
2123static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
2124{
2125#ifdef CONFIG_X86_64
2126 bool vcpus_matched;
2127 struct kvm_arch *ka = &vcpu->kvm->arch;
2128 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2129
2130 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2131 atomic_read(&vcpu->kvm->online_vcpus));
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141 if (ka->use_master_clock ||
2142 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
2143 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2144
2145 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
2146 atomic_read(&vcpu->kvm->online_vcpus),
2147 ka->use_master_clock, gtod->clock.vclock_mode);
2148#endif
2149}
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161static inline u64 __scale_tsc(u64 ratio, u64 tsc)
2162{
2163 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
2164}
2165
2166u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
2167{
2168 u64 _tsc = tsc;
2169 u64 ratio = vcpu->arch.tsc_scaling_ratio;
2170
2171 if (ratio != kvm_default_tsc_scaling_ratio)
2172 _tsc = __scale_tsc(ratio, tsc);
2173
2174 return _tsc;
2175}
2176EXPORT_SYMBOL_GPL(kvm_scale_tsc);
2177
2178static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
2179{
2180 u64 tsc;
2181
2182 tsc = kvm_scale_tsc(vcpu, rdtsc());
2183
2184 return target_tsc - tsc;
2185}
2186
2187u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2188{
2189 return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
2190}
2191EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
2192
2193static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
2194{
2195 vcpu->arch.l1_tsc_offset = offset;
2196 vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset);
2197}
2198
2199static inline bool kvm_check_tsc_unstable(void)
2200{
2201#ifdef CONFIG_X86_64
2202
2203
2204
2205
2206 if (pvclock_gtod_data.clock.vclock_mode == VDSO_CLOCKMODE_HVCLOCK)
2207 return false;
2208#endif
2209 return check_tsc_unstable();
2210}
2211
2212static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
2213{
2214 struct kvm *kvm = vcpu->kvm;
2215 u64 offset, ns, elapsed;
2216 unsigned long flags;
2217 bool matched;
2218 bool already_matched;
2219 bool synchronizing = false;
2220
2221 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
2222 offset = kvm_compute_tsc_offset(vcpu, data);
2223 ns = get_kvmclock_base_ns();
2224 elapsed = ns - kvm->arch.last_tsc_nsec;
2225
2226 if (vcpu->arch.virtual_tsc_khz) {
2227 if (data == 0) {
2228
2229
2230
2231
2232
2233 synchronizing = true;
2234 } else {
2235 u64 tsc_exp = kvm->arch.last_tsc_write +
2236 nsec_to_cycles(vcpu, elapsed);
2237 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
2238
2239
2240
2241
2242
2243 synchronizing = data < tsc_exp + tsc_hz &&
2244 data + tsc_hz > tsc_exp;
2245 }
2246 }
2247
2248
2249
2250
2251
2252
2253
2254 if (synchronizing &&
2255 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
2256 if (!kvm_check_tsc_unstable()) {
2257 offset = kvm->arch.cur_tsc_offset;
2258 } else {
2259 u64 delta = nsec_to_cycles(vcpu, elapsed);
2260 data += delta;
2261 offset = kvm_compute_tsc_offset(vcpu, data);
2262 }
2263 matched = true;
2264 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
2265 } else {
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275 kvm->arch.cur_tsc_generation++;
2276 kvm->arch.cur_tsc_nsec = ns;
2277 kvm->arch.cur_tsc_write = data;
2278 kvm->arch.cur_tsc_offset = offset;
2279 matched = false;
2280 }
2281
2282
2283
2284
2285
2286 kvm->arch.last_tsc_nsec = ns;
2287 kvm->arch.last_tsc_write = data;
2288 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
2289
2290 vcpu->arch.last_guest_tsc = data;
2291
2292
2293 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
2294 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
2295 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
2296
2297 kvm_vcpu_write_tsc_offset(vcpu, offset);
2298 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
2299
2300 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
2301 if (!matched) {
2302 kvm->arch.nr_vcpus_matched_tsc = 0;
2303 } else if (!already_matched) {
2304 kvm->arch.nr_vcpus_matched_tsc++;
2305 }
2306
2307 kvm_track_tsc_matching(vcpu);
2308 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
2309}
2310
2311static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
2312 s64 adjustment)
2313{
2314 u64 tsc_offset = vcpu->arch.l1_tsc_offset;
2315 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
2316}
2317
2318static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
2319{
2320 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
2321 WARN_ON(adjustment < 0);
2322 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
2323 adjust_tsc_offset_guest(vcpu, adjustment);
2324}
2325
2326#ifdef CONFIG_X86_64
2327
2328static u64 read_tsc(void)
2329{
2330 u64 ret = (u64)rdtsc_ordered();
2331 u64 last = pvclock_gtod_data.clock.cycle_last;
2332
2333 if (likely(ret >= last))
2334 return ret;
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344 asm volatile ("");
2345 return last;
2346}
2347
2348static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
2349 int *mode)
2350{
2351 long v;
2352 u64 tsc_pg_val;
2353
2354 switch (clock->vclock_mode) {
2355 case VDSO_CLOCKMODE_HVCLOCK:
2356 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2357 tsc_timestamp);
2358 if (tsc_pg_val != U64_MAX) {
2359
2360 *mode = VDSO_CLOCKMODE_HVCLOCK;
2361 v = (tsc_pg_val - clock->cycle_last) &
2362 clock->mask;
2363 } else {
2364
2365 *mode = VDSO_CLOCKMODE_NONE;
2366 }
2367 break;
2368 case VDSO_CLOCKMODE_TSC:
2369 *mode = VDSO_CLOCKMODE_TSC;
2370 *tsc_timestamp = read_tsc();
2371 v = (*tsc_timestamp - clock->cycle_last) &
2372 clock->mask;
2373 break;
2374 default:
2375 *mode = VDSO_CLOCKMODE_NONE;
2376 }
2377
2378 if (*mode == VDSO_CLOCKMODE_NONE)
2379 *tsc_timestamp = v = 0;
2380
2381 return v * clock->mult;
2382}
2383
2384static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
2385{
2386 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2387 unsigned long seq;
2388 int mode;
2389 u64 ns;
2390
2391 do {
2392 seq = read_seqcount_begin(>od->seq);
2393 ns = gtod->raw_clock.base_cycles;
2394 ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
2395 ns >>= gtod->raw_clock.shift;
2396 ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
2397 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2398 *t = ns;
2399
2400 return mode;
2401}
2402
2403static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2404{
2405 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2406 unsigned long seq;
2407 int mode;
2408 u64 ns;
2409
2410 do {
2411 seq = read_seqcount_begin(>od->seq);
2412 ts->tv_sec = gtod->wall_time_sec;
2413 ns = gtod->clock.base_cycles;
2414 ns += vgettsc(>od->clock, tsc_timestamp, &mode);
2415 ns >>= gtod->clock.shift;
2416 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2417
2418 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2419 ts->tv_nsec = ns;
2420
2421 return mode;
2422}
2423
2424
2425static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2426{
2427
2428 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2429 return false;
2430
2431 return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
2432 tsc_timestamp));
2433}
2434
2435
2436static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2437 u64 *tsc_timestamp)
2438{
2439
2440 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2441 return false;
2442
2443 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2444}
2445#endif
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2489{
2490#ifdef CONFIG_X86_64
2491 struct kvm_arch *ka = &kvm->arch;
2492 int vclock_mode;
2493 bool host_tsc_clocksource, vcpus_matched;
2494
2495 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2496 atomic_read(&kvm->online_vcpus));
2497
2498
2499
2500
2501
2502 host_tsc_clocksource = kvm_get_time_and_clockread(
2503 &ka->master_kernel_ns,
2504 &ka->master_cycle_now);
2505
2506 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2507 && !ka->backwards_tsc_observed
2508 && !ka->boot_vcpu_runs_old_kvmclock;
2509
2510 if (ka->use_master_clock)
2511 atomic_set(&kvm_guest_has_master_clock, 1);
2512
2513 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2514 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2515 vcpus_matched);
2516#endif
2517}
2518
2519void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2520{
2521 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2522}
2523
2524static void kvm_gen_update_masterclock(struct kvm *kvm)
2525{
2526#ifdef CONFIG_X86_64
2527 int i;
2528 struct kvm_vcpu *vcpu;
2529 struct kvm_arch *ka = &kvm->arch;
2530
2531 spin_lock(&ka->pvclock_gtod_sync_lock);
2532 kvm_make_mclock_inprogress_request(kvm);
2533
2534 pvclock_update_vm_gtod_copy(kvm);
2535
2536 kvm_for_each_vcpu(i, vcpu, kvm)
2537 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2538
2539
2540 kvm_for_each_vcpu(i, vcpu, kvm)
2541 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2542
2543 spin_unlock(&ka->pvclock_gtod_sync_lock);
2544#endif
2545}
2546
2547u64 get_kvmclock_ns(struct kvm *kvm)
2548{
2549 struct kvm_arch *ka = &kvm->arch;
2550 struct pvclock_vcpu_time_info hv_clock;
2551 u64 ret;
2552
2553 spin_lock(&ka->pvclock_gtod_sync_lock);
2554 if (!ka->use_master_clock) {
2555 spin_unlock(&ka->pvclock_gtod_sync_lock);
2556 return get_kvmclock_base_ns() + ka->kvmclock_offset;
2557 }
2558
2559 hv_clock.tsc_timestamp = ka->master_cycle_now;
2560 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2561 spin_unlock(&ka->pvclock_gtod_sync_lock);
2562
2563
2564 get_cpu();
2565
2566 if (__this_cpu_read(cpu_tsc_khz)) {
2567 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2568 &hv_clock.tsc_shift,
2569 &hv_clock.tsc_to_system_mul);
2570 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
2571 } else
2572 ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
2573
2574 put_cpu();
2575
2576 return ret;
2577}
2578
2579static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
2580{
2581 struct kvm_vcpu_arch *vcpu = &v->arch;
2582 struct pvclock_vcpu_time_info guest_hv_clock;
2583
2584 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
2585 &guest_hv_clock, sizeof(guest_hv_clock))))
2586 return;
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2603
2604 if (guest_hv_clock.version & 1)
2605 ++guest_hv_clock.version;
2606
2607 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2608 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2609 &vcpu->hv_clock,
2610 sizeof(vcpu->hv_clock.version));
2611
2612 smp_wmb();
2613
2614
2615 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2616
2617 if (vcpu->pvclock_set_guest_stopped_request) {
2618 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2619 vcpu->pvclock_set_guest_stopped_request = false;
2620 }
2621
2622 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2623
2624 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2625 &vcpu->hv_clock,
2626 sizeof(vcpu->hv_clock));
2627
2628 smp_wmb();
2629
2630 vcpu->hv_clock.version++;
2631 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2632 &vcpu->hv_clock,
2633 sizeof(vcpu->hv_clock.version));
2634}
2635
2636static int kvm_guest_time_update(struct kvm_vcpu *v)
2637{
2638 unsigned long flags, tgt_tsc_khz;
2639 struct kvm_vcpu_arch *vcpu = &v->arch;
2640 struct kvm_arch *ka = &v->kvm->arch;
2641 s64 kernel_ns;
2642 u64 tsc_timestamp, host_tsc;
2643 u8 pvclock_flags;
2644 bool use_master_clock;
2645
2646 kernel_ns = 0;
2647 host_tsc = 0;
2648
2649
2650
2651
2652
2653 spin_lock(&ka->pvclock_gtod_sync_lock);
2654 use_master_clock = ka->use_master_clock;
2655 if (use_master_clock) {
2656 host_tsc = ka->master_cycle_now;
2657 kernel_ns = ka->master_kernel_ns;
2658 }
2659 spin_unlock(&ka->pvclock_gtod_sync_lock);
2660
2661
2662 local_irq_save(flags);
2663 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2664 if (unlikely(tgt_tsc_khz == 0)) {
2665 local_irq_restore(flags);
2666 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2667 return 1;
2668 }
2669 if (!use_master_clock) {
2670 host_tsc = rdtsc();
2671 kernel_ns = get_kvmclock_base_ns();
2672 }
2673
2674 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686 if (vcpu->tsc_catchup) {
2687 u64 tsc = compute_guest_tsc(v, kernel_ns);
2688 if (tsc > tsc_timestamp) {
2689 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
2690 tsc_timestamp = tsc;
2691 }
2692 }
2693
2694 local_irq_restore(flags);
2695
2696
2697
2698 if (kvm_has_tsc_control)
2699 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
2700
2701 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
2702 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
2703 &vcpu->hv_clock.tsc_shift,
2704 &vcpu->hv_clock.tsc_to_system_mul);
2705 vcpu->hw_tsc_khz = tgt_tsc_khz;
2706 }
2707
2708 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
2709 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
2710 vcpu->last_guest_tsc = tsc_timestamp;
2711
2712
2713 pvclock_flags = 0;
2714 if (use_master_clock)
2715 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
2716
2717 vcpu->hv_clock.flags = pvclock_flags;
2718
2719 if (vcpu->pv_time_enabled)
2720 kvm_setup_pvclock_page(v);
2721 if (v == kvm_get_vcpu(v->kvm, 0))
2722 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
2723 return 0;
2724}
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
2741
2742static void kvmclock_update_fn(struct work_struct *work)
2743{
2744 int i;
2745 struct delayed_work *dwork = to_delayed_work(work);
2746 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2747 kvmclock_update_work);
2748 struct kvm *kvm = container_of(ka, struct kvm, arch);
2749 struct kvm_vcpu *vcpu;
2750
2751 kvm_for_each_vcpu(i, vcpu, kvm) {
2752 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2753 kvm_vcpu_kick(vcpu);
2754 }
2755}
2756
2757static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
2758{
2759 struct kvm *kvm = v->kvm;
2760
2761 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2762 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
2763 KVMCLOCK_UPDATE_DELAY);
2764}
2765
2766#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
2767
2768static void kvmclock_sync_fn(struct work_struct *work)
2769{
2770 struct delayed_work *dwork = to_delayed_work(work);
2771 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2772 kvmclock_sync_work);
2773 struct kvm *kvm = container_of(ka, struct kvm, arch);
2774
2775 if (!kvmclock_periodic_sync)
2776 return;
2777
2778 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
2779 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
2780 KVMCLOCK_SYNC_PERIOD);
2781}
2782
2783
2784
2785
2786static bool can_set_mci_status(struct kvm_vcpu *vcpu)
2787{
2788
2789 if (guest_cpuid_is_amd_or_hygon(vcpu))
2790 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
2791
2792 return false;
2793}
2794
2795static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2796{
2797 u64 mcg_cap = vcpu->arch.mcg_cap;
2798 unsigned bank_num = mcg_cap & 0xff;
2799 u32 msr = msr_info->index;
2800 u64 data = msr_info->data;
2801
2802 switch (msr) {
2803 case MSR_IA32_MCG_STATUS:
2804 vcpu->arch.mcg_status = data;
2805 break;
2806 case MSR_IA32_MCG_CTL:
2807 if (!(mcg_cap & MCG_CTL_P) &&
2808 (data || !msr_info->host_initiated))
2809 return 1;
2810 if (data != 0 && data != ~(u64)0)
2811 return 1;
2812 vcpu->arch.mcg_ctl = data;
2813 break;
2814 default:
2815 if (msr >= MSR_IA32_MC0_CTL &&
2816 msr < MSR_IA32_MCx_CTL(bank_num)) {
2817 u32 offset = array_index_nospec(
2818 msr - MSR_IA32_MC0_CTL,
2819 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
2820
2821
2822
2823
2824
2825
2826 if ((offset & 0x3) == 0 &&
2827 data != 0 && (data | (1 << 10)) != ~(u64)0)
2828 return -1;
2829
2830
2831 if (!msr_info->host_initiated &&
2832 (offset & 0x3) == 1 && data != 0) {
2833 if (!can_set_mci_status(vcpu))
2834 return -1;
2835 }
2836
2837 vcpu->arch.mce_banks[offset] = data;
2838 break;
2839 }
2840 return 1;
2841 }
2842 return 0;
2843}
2844
2845static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
2846{
2847 struct kvm *kvm = vcpu->kvm;
2848 int lm = is_long_mode(vcpu);
2849 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
2850 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
2851 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
2852 : kvm->arch.xen_hvm_config.blob_size_32;
2853 u32 page_num = data & ~PAGE_MASK;
2854 u64 page_addr = data & PAGE_MASK;
2855 u8 *page;
2856
2857 if (page_num >= blob_size)
2858 return 1;
2859
2860 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
2861 if (IS_ERR(page))
2862 return PTR_ERR(page);
2863
2864 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
2865 kfree(page);
2866 return 1;
2867 }
2868 return 0;
2869}
2870
2871static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
2872{
2873 u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
2874
2875 return (vcpu->arch.apf.msr_en_val & mask) == mask;
2876}
2877
2878static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2879{
2880 gpa_t gpa = data & ~0x3f;
2881
2882
2883 if (data & 0x30)
2884 return 1;
2885
2886 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_VMEXIT) &&
2887 (data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT))
2888 return 1;
2889
2890 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT) &&
2891 (data & KVM_ASYNC_PF_DELIVERY_AS_INT))
2892 return 1;
2893
2894 if (!lapic_in_kernel(vcpu))
2895 return data ? 1 : 0;
2896
2897 vcpu->arch.apf.msr_en_val = data;
2898
2899 if (!kvm_pv_async_pf_enabled(vcpu)) {
2900 kvm_clear_async_pf_completion_queue(vcpu);
2901 kvm_async_pf_hash_reset(vcpu);
2902 return 0;
2903 }
2904
2905 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
2906 sizeof(u64)))
2907 return 1;
2908
2909 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
2910 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
2911
2912 kvm_async_pf_wakeup_all(vcpu);
2913
2914 return 0;
2915}
2916
2917static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
2918{
2919
2920 if (data >> 8)
2921 return 1;
2922
2923 if (!lapic_in_kernel(vcpu))
2924 return 1;
2925
2926 vcpu->arch.apf.msr_int_val = data;
2927
2928 vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
2929
2930 return 0;
2931}
2932
2933static void kvmclock_reset(struct kvm_vcpu *vcpu)
2934{
2935 vcpu->arch.pv_time_enabled = false;
2936 vcpu->arch.time = 0;
2937}
2938
2939static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
2940{
2941 ++vcpu->stat.tlb_flush;
2942 kvm_x86_ops.tlb_flush_all(vcpu);
2943}
2944
2945static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
2946{
2947 ++vcpu->stat.tlb_flush;
2948 kvm_x86_ops.tlb_flush_guest(vcpu);
2949}
2950
2951static void record_steal_time(struct kvm_vcpu *vcpu)
2952{
2953 struct kvm_host_map map;
2954 struct kvm_steal_time *st;
2955
2956 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2957 return;
2958
2959
2960 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
2961 &map, &vcpu->arch.st.cache, false))
2962 return;
2963
2964 st = map.hva +
2965 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
2966
2967
2968
2969
2970
2971 if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
2972 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
2973 st->preempted & KVM_VCPU_FLUSH_TLB);
2974 if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
2975 kvm_vcpu_flush_tlb_guest(vcpu);
2976 }
2977
2978 vcpu->arch.st.preempted = 0;
2979
2980 if (st->version & 1)
2981 st->version += 1;
2982
2983 st->version += 1;
2984
2985 smp_wmb();
2986
2987 st->steal += current->sched_info.run_delay -
2988 vcpu->arch.st.last_steal;
2989 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2990
2991 smp_wmb();
2992
2993 st->version += 1;
2994
2995 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
2996}
2997
2998int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2999{
3000 bool pr = false;
3001 u32 msr = msr_info->index;
3002 u64 data = msr_info->data;
3003
3004 switch (msr) {
3005 case MSR_AMD64_NB_CFG:
3006 case MSR_IA32_UCODE_WRITE:
3007 case MSR_VM_HSAVE_PA:
3008 case MSR_AMD64_PATCH_LOADER:
3009 case MSR_AMD64_BU_CFG2:
3010 case MSR_AMD64_DC_CFG:
3011 case MSR_F15H_EX_CFG:
3012 break;
3013
3014 case MSR_IA32_UCODE_REV:
3015 if (msr_info->host_initiated)
3016 vcpu->arch.microcode_version = data;
3017 break;
3018 case MSR_IA32_ARCH_CAPABILITIES:
3019 if (!msr_info->host_initiated)
3020 return 1;
3021 vcpu->arch.arch_capabilities = data;
3022 break;
3023 case MSR_IA32_PERF_CAPABILITIES: {
3024 struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
3025
3026 if (!msr_info->host_initiated)
3027 return 1;
3028 if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
3029 return 1;
3030 if (data & ~msr_ent.data)
3031 return 1;
3032
3033 vcpu->arch.perf_capabilities = data;
3034
3035 return 0;
3036 }
3037 case MSR_EFER:
3038 return set_efer(vcpu, msr_info);
3039 case MSR_K7_HWCR:
3040 data &= ~(u64)0x40;
3041 data &= ~(u64)0x100;
3042 data &= ~(u64)0x8;
3043
3044
3045 if (data == BIT_ULL(18)) {
3046 vcpu->arch.msr_hwcr = data;
3047 } else if (data != 0) {
3048 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
3049 data);
3050 return 1;
3051 }
3052 break;
3053 case MSR_FAM10H_MMIO_CONF_BASE:
3054 if (data != 0) {
3055 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
3056 "0x%llx\n", data);
3057 return 1;
3058 }
3059 break;
3060 case MSR_IA32_DEBUGCTLMSR:
3061 if (!data) {
3062
3063 break;
3064 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
3065
3066
3067 return 1;
3068 } else if (report_ignored_msrs)
3069 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
3070 __func__, data);
3071 break;
3072 case 0x200 ... 0x2ff:
3073 return kvm_mtrr_set_msr(vcpu, msr, data);
3074 case MSR_IA32_APICBASE:
3075 return kvm_set_apic_base(vcpu, msr_info);
3076 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3077 return kvm_x2apic_msr_write(vcpu, msr, data);
3078 case MSR_IA32_TSCDEADLINE:
3079 kvm_set_lapic_tscdeadline_msr(vcpu, data);
3080 break;
3081 case MSR_IA32_TSC_ADJUST:
3082 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
3083 if (!msr_info->host_initiated) {
3084 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
3085 adjust_tsc_offset_guest(vcpu, adj);
3086 }
3087 vcpu->arch.ia32_tsc_adjust_msr = data;
3088 }
3089 break;
3090 case MSR_IA32_MISC_ENABLE:
3091 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
3092 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
3093 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
3094 return 1;
3095 vcpu->arch.ia32_misc_enable_msr = data;
3096 kvm_update_cpuid_runtime(vcpu);
3097 } else {
3098 vcpu->arch.ia32_misc_enable_msr = data;
3099 }
3100 break;
3101 case MSR_IA32_SMBASE:
3102 if (!msr_info->host_initiated)
3103 return 1;
3104 vcpu->arch.smbase = data;
3105 break;
3106 case MSR_IA32_POWER_CTL:
3107 vcpu->arch.msr_ia32_power_ctl = data;
3108 break;
3109 case MSR_IA32_TSC:
3110 if (msr_info->host_initiated) {
3111 kvm_synchronize_tsc(vcpu, data);
3112 } else {
3113 u64 adj = kvm_compute_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
3114 adjust_tsc_offset_guest(vcpu, adj);
3115 vcpu->arch.ia32_tsc_adjust_msr += adj;
3116 }
3117 break;
3118 case MSR_IA32_XSS:
3119 if (!msr_info->host_initiated &&
3120 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3121 return 1;
3122
3123
3124
3125
3126
3127 if (data & ~supported_xss)
3128 return 1;
3129 vcpu->arch.ia32_xss = data;
3130 break;
3131 case MSR_SMI_COUNT:
3132 if (!msr_info->host_initiated)
3133 return 1;
3134 vcpu->arch.smi_count = data;
3135 break;
3136 case MSR_KVM_WALL_CLOCK_NEW:
3137 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3138 return 1;
3139
3140 kvm_write_wall_clock(vcpu->kvm, data);
3141 break;
3142 case MSR_KVM_WALL_CLOCK:
3143 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3144 return 1;
3145
3146 kvm_write_wall_clock(vcpu->kvm, data);
3147 break;
3148 case MSR_KVM_SYSTEM_TIME_NEW:
3149 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3150 return 1;
3151
3152 kvm_write_system_time(vcpu, data, false, msr_info->host_initiated);
3153 break;
3154 case MSR_KVM_SYSTEM_TIME:
3155 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3156 return 1;
3157
3158 kvm_write_system_time(vcpu, data, true, msr_info->host_initiated);
3159 break;
3160 case MSR_KVM_ASYNC_PF_EN:
3161 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3162 return 1;
3163
3164 if (kvm_pv_enable_async_pf(vcpu, data))
3165 return 1;
3166 break;
3167 case MSR_KVM_ASYNC_PF_INT:
3168 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3169 return 1;
3170
3171 if (kvm_pv_enable_async_pf_int(vcpu, data))
3172 return 1;
3173 break;
3174 case MSR_KVM_ASYNC_PF_ACK:
3175 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3176 return 1;
3177 if (data & 0x1) {
3178 vcpu->arch.apf.pageready_pending = false;
3179 kvm_check_async_pf_completion(vcpu);
3180 }
3181 break;
3182 case MSR_KVM_STEAL_TIME:
3183 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3184 return 1;
3185
3186 if (unlikely(!sched_info_on()))
3187 return 1;
3188
3189 if (data & KVM_STEAL_RESERVED_MASK)
3190 return 1;
3191
3192 vcpu->arch.st.msr_val = data;
3193
3194 if (!(data & KVM_MSR_ENABLED))
3195 break;
3196
3197 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3198
3199 break;
3200 case MSR_KVM_PV_EOI_EN:
3201 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3202 return 1;
3203
3204 if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
3205 return 1;
3206 break;
3207
3208 case MSR_KVM_POLL_CONTROL:
3209 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3210 return 1;
3211
3212
3213 if (data & (-1ULL << 1))
3214 return 1;
3215
3216 vcpu->arch.msr_kvm_poll_control = data;
3217 break;
3218
3219 case MSR_IA32_MCG_CTL:
3220 case MSR_IA32_MCG_STATUS:
3221 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3222 return set_msr_mce(vcpu, msr_info);
3223
3224 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3225 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3226 pr = true;
3227 fallthrough;
3228 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3229 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3230 if (kvm_pmu_is_valid_msr(vcpu, msr))
3231 return kvm_pmu_set_msr(vcpu, msr_info);
3232
3233 if (pr || data != 0)
3234 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
3235 "0x%x data 0x%llx\n", msr, data);
3236 break;
3237 case MSR_K7_CLK_CTL:
3238
3239
3240
3241
3242
3243
3244
3245
3246 break;
3247 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3248 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3249 case HV_X64_MSR_SYNDBG_OPTIONS:
3250 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3251 case HV_X64_MSR_CRASH_CTL:
3252 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3253 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3254 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3255 case HV_X64_MSR_TSC_EMULATION_STATUS:
3256 return kvm_hv_set_msr_common(vcpu, msr, data,
3257 msr_info->host_initiated);
3258 case MSR_IA32_BBL_CR_CTL3:
3259
3260
3261
3262 if (report_ignored_msrs)
3263 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
3264 msr, data);
3265 break;
3266 case MSR_AMD64_OSVW_ID_LENGTH:
3267 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3268 return 1;
3269 vcpu->arch.osvw.length = data;
3270 break;
3271 case MSR_AMD64_OSVW_STATUS:
3272 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3273 return 1;
3274 vcpu->arch.osvw.status = data;
3275 break;
3276 case MSR_PLATFORM_INFO:
3277 if (!msr_info->host_initiated ||
3278 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
3279 cpuid_fault_enabled(vcpu)))
3280 return 1;
3281 vcpu->arch.msr_platform_info = data;
3282 break;
3283 case MSR_MISC_FEATURES_ENABLES:
3284 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
3285 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3286 !supports_cpuid_fault(vcpu)))
3287 return 1;
3288 vcpu->arch.msr_misc_features_enables = data;
3289 break;
3290 default:
3291 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
3292 return xen_hvm_config(vcpu, data);
3293 if (kvm_pmu_is_valid_msr(vcpu, msr))
3294 return kvm_pmu_set_msr(vcpu, msr_info);
3295 return KVM_MSR_RET_INVALID;
3296 }
3297 return 0;
3298}
3299EXPORT_SYMBOL_GPL(kvm_set_msr_common);
3300
3301static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
3302{
3303 u64 data;
3304 u64 mcg_cap = vcpu->arch.mcg_cap;
3305 unsigned bank_num = mcg_cap & 0xff;
3306
3307 switch (msr) {
3308 case MSR_IA32_P5_MC_ADDR:
3309 case MSR_IA32_P5_MC_TYPE:
3310 data = 0;
3311 break;
3312 case MSR_IA32_MCG_CAP:
3313 data = vcpu->arch.mcg_cap;
3314 break;
3315 case MSR_IA32_MCG_CTL:
3316 if (!(mcg_cap & MCG_CTL_P) && !host)
3317 return 1;
3318 data = vcpu->arch.mcg_ctl;
3319 break;
3320 case MSR_IA32_MCG_STATUS:
3321 data = vcpu->arch.mcg_status;
3322 break;
3323 default:
3324 if (msr >= MSR_IA32_MC0_CTL &&
3325 msr < MSR_IA32_MCx_CTL(bank_num)) {
3326 u32 offset = array_index_nospec(
3327 msr - MSR_IA32_MC0_CTL,
3328 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3329
3330 data = vcpu->arch.mce_banks[offset];
3331 break;
3332 }
3333 return 1;
3334 }
3335 *pdata = data;
3336 return 0;
3337}
3338
3339int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3340{
3341 switch (msr_info->index) {
3342 case MSR_IA32_PLATFORM_ID:
3343 case MSR_IA32_EBL_CR_POWERON:
3344 case MSR_IA32_DEBUGCTLMSR:
3345 case MSR_IA32_LASTBRANCHFROMIP:
3346 case MSR_IA32_LASTBRANCHTOIP:
3347 case MSR_IA32_LASTINTFROMIP:
3348 case MSR_IA32_LASTINTTOIP:
3349 case MSR_K8_SYSCFG:
3350 case MSR_K8_TSEG_ADDR:
3351 case MSR_K8_TSEG_MASK:
3352 case MSR_VM_HSAVE_PA:
3353 case MSR_K8_INT_PENDING_MSG:
3354 case MSR_AMD64_NB_CFG:
3355 case MSR_FAM10H_MMIO_CONF_BASE:
3356 case MSR_AMD64_BU_CFG2:
3357 case MSR_IA32_PERF_CTL:
3358 case MSR_AMD64_DC_CFG:
3359 case MSR_F15H_EX_CFG:
3360
3361
3362
3363
3364
3365
3366 case MSR_RAPL_POWER_UNIT:
3367 case MSR_PP0_ENERGY_STATUS:
3368 case MSR_PP1_ENERGY_STATUS:
3369 case MSR_PKG_ENERGY_STATUS:
3370 case MSR_DRAM_ENERGY_STATUS:
3371 msr_info->data = 0;
3372 break;
3373 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3374 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3375 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3376 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3377 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3378 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3379 return kvm_pmu_get_msr(vcpu, msr_info);
3380 msr_info->data = 0;
3381 break;
3382 case MSR_IA32_UCODE_REV:
3383 msr_info->data = vcpu->arch.microcode_version;
3384 break;
3385 case MSR_IA32_ARCH_CAPABILITIES:
3386 if (!msr_info->host_initiated &&
3387 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3388 return 1;
3389 msr_info->data = vcpu->arch.arch_capabilities;
3390 break;
3391 case MSR_IA32_PERF_CAPABILITIES:
3392 if (!msr_info->host_initiated &&
3393 !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
3394 return 1;
3395 msr_info->data = vcpu->arch.perf_capabilities;
3396 break;
3397 case MSR_IA32_POWER_CTL:
3398 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
3399 break;
3400 case MSR_IA32_TSC: {
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410 u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
3411 vcpu->arch.tsc_offset;
3412
3413 msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
3414 break;
3415 }
3416 case MSR_MTRRcap:
3417 case 0x200 ... 0x2ff:
3418 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
3419 case 0xcd:
3420 msr_info->data = 3;
3421 break;
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433 case MSR_EBC_FREQUENCY_ID:
3434 msr_info->data = 1 << 24;
3435 break;
3436 case MSR_IA32_APICBASE:
3437 msr_info->data = kvm_get_apic_base(vcpu);
3438 break;
3439 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3440 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
3441 case MSR_IA32_TSCDEADLINE:
3442 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
3443 break;
3444 case MSR_IA32_TSC_ADJUST:
3445 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
3446 break;
3447 case MSR_IA32_MISC_ENABLE:
3448 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
3449 break;
3450 case MSR_IA32_SMBASE:
3451 if (!msr_info->host_initiated)
3452 return 1;
3453 msr_info->data = vcpu->arch.smbase;
3454 break;
3455 case MSR_SMI_COUNT:
3456 msr_info->data = vcpu->arch.smi_count;
3457 break;
3458 case MSR_IA32_PERF_STATUS:
3459
3460 msr_info->data = 1000ULL;
3461
3462 msr_info->data |= (((uint64_t)4ULL) << 40);
3463 break;
3464 case MSR_EFER:
3465 msr_info->data = vcpu->arch.efer;
3466 break;
3467 case MSR_KVM_WALL_CLOCK:
3468 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3469 return 1;
3470
3471 msr_info->data = vcpu->kvm->arch.wall_clock;
3472 break;
3473 case MSR_KVM_WALL_CLOCK_NEW:
3474 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3475 return 1;
3476
3477 msr_info->data = vcpu->kvm->arch.wall_clock;
3478 break;
3479 case MSR_KVM_SYSTEM_TIME:
3480 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
3481 return 1;
3482
3483 msr_info->data = vcpu->arch.time;
3484 break;
3485 case MSR_KVM_SYSTEM_TIME_NEW:
3486 if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
3487 return 1;
3488
3489 msr_info->data = vcpu->arch.time;
3490 break;
3491 case MSR_KVM_ASYNC_PF_EN:
3492 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3493 return 1;
3494
3495 msr_info->data = vcpu->arch.apf.msr_en_val;
3496 break;
3497 case MSR_KVM_ASYNC_PF_INT:
3498 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
3499 return 1;
3500
3501 msr_info->data = vcpu->arch.apf.msr_int_val;
3502 break;
3503 case MSR_KVM_ASYNC_PF_ACK:
3504 if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
3505 return 1;
3506
3507 msr_info->data = 0;
3508 break;
3509 case MSR_KVM_STEAL_TIME:
3510 if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME))
3511 return 1;
3512
3513 msr_info->data = vcpu->arch.st.msr_val;
3514 break;
3515 case MSR_KVM_PV_EOI_EN:
3516 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
3517 return 1;
3518
3519 msr_info->data = vcpu->arch.pv_eoi.msr_val;
3520 break;
3521 case MSR_KVM_POLL_CONTROL:
3522 if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL))
3523 return 1;
3524
3525 msr_info->data = vcpu->arch.msr_kvm_poll_control;
3526 break;
3527 case MSR_IA32_P5_MC_ADDR:
3528 case MSR_IA32_P5_MC_TYPE:
3529 case MSR_IA32_MCG_CAP:
3530 case MSR_IA32_MCG_CTL:
3531 case MSR_IA32_MCG_STATUS:
3532 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3533 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
3534 msr_info->host_initiated);
3535 case MSR_IA32_XSS:
3536 if (!msr_info->host_initiated &&
3537 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3538 return 1;
3539 msr_info->data = vcpu->arch.ia32_xss;
3540 break;
3541 case MSR_K7_CLK_CTL:
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551 msr_info->data = 0x20000000;
3552 break;
3553 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3554 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3555 case HV_X64_MSR_SYNDBG_OPTIONS:
3556 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3557 case HV_X64_MSR_CRASH_CTL:
3558 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3559 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3560 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3561 case HV_X64_MSR_TSC_EMULATION_STATUS:
3562 return kvm_hv_get_msr_common(vcpu,
3563 msr_info->index, &msr_info->data,
3564 msr_info->host_initiated);
3565 case MSR_IA32_BBL_CR_CTL3:
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576 msr_info->data = 0xbe702111;
3577 break;
3578 case MSR_AMD64_OSVW_ID_LENGTH:
3579 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3580 return 1;
3581 msr_info->data = vcpu->arch.osvw.length;
3582 break;
3583 case MSR_AMD64_OSVW_STATUS:
3584 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3585 return 1;
3586 msr_info->data = vcpu->arch.osvw.status;
3587 break;
3588 case MSR_PLATFORM_INFO:
3589 if (!msr_info->host_initiated &&
3590 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
3591 return 1;
3592 msr_info->data = vcpu->arch.msr_platform_info;
3593 break;
3594 case MSR_MISC_FEATURES_ENABLES:
3595 msr_info->data = vcpu->arch.msr_misc_features_enables;
3596 break;
3597 case MSR_K7_HWCR:
3598 msr_info->data = vcpu->arch.msr_hwcr;
3599 break;
3600 default:
3601 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3602 return kvm_pmu_get_msr(vcpu, msr_info);
3603 return KVM_MSR_RET_INVALID;
3604 }
3605 return 0;
3606}
3607EXPORT_SYMBOL_GPL(kvm_get_msr_common);
3608
3609
3610
3611
3612
3613
3614static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
3615 struct kvm_msr_entry *entries,
3616 int (*do_msr)(struct kvm_vcpu *vcpu,
3617 unsigned index, u64 *data))
3618{
3619 int i;
3620
3621 for (i = 0; i < msrs->nmsrs; ++i)
3622 if (do_msr(vcpu, entries[i].index, &entries[i].data))
3623 break;
3624
3625 return i;
3626}
3627
3628
3629
3630
3631
3632
3633static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
3634 int (*do_msr)(struct kvm_vcpu *vcpu,
3635 unsigned index, u64 *data),
3636 int writeback)
3637{
3638 struct kvm_msrs msrs;
3639 struct kvm_msr_entry *entries;
3640 int r, n;
3641 unsigned size;
3642
3643 r = -EFAULT;
3644 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
3645 goto out;
3646
3647 r = -E2BIG;
3648 if (msrs.nmsrs >= MAX_IO_MSRS)
3649 goto out;
3650
3651 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
3652 entries = memdup_user(user_msrs->entries, size);
3653 if (IS_ERR(entries)) {
3654 r = PTR_ERR(entries);
3655 goto out;
3656 }
3657
3658 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
3659 if (r < 0)
3660 goto out_free;
3661
3662 r = -EFAULT;
3663 if (writeback && copy_to_user(user_msrs->entries, entries, size))
3664 goto out_free;
3665
3666 r = n;
3667
3668out_free:
3669 kfree(entries);
3670out:
3671 return r;
3672}
3673
3674static inline bool kvm_can_mwait_in_guest(void)
3675{
3676 return boot_cpu_has(X86_FEATURE_MWAIT) &&
3677 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
3678 boot_cpu_has(X86_FEATURE_ARAT);
3679}
3680
3681int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
3682{
3683 int r = 0;
3684
3685 switch (ext) {
3686 case KVM_CAP_IRQCHIP:
3687 case KVM_CAP_HLT:
3688 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
3689 case KVM_CAP_SET_TSS_ADDR:
3690 case KVM_CAP_EXT_CPUID:
3691 case KVM_CAP_EXT_EMUL_CPUID:
3692 case KVM_CAP_CLOCKSOURCE:
3693 case KVM_CAP_PIT:
3694 case KVM_CAP_NOP_IO_DELAY:
3695 case KVM_CAP_MP_STATE:
3696 case KVM_CAP_SYNC_MMU:
3697 case KVM_CAP_USER_NMI:
3698 case KVM_CAP_REINJECT_CONTROL:
3699 case KVM_CAP_IRQ_INJECT_STATUS:
3700 case KVM_CAP_IOEVENTFD:
3701 case KVM_CAP_IOEVENTFD_NO_LENGTH:
3702 case KVM_CAP_PIT2:
3703 case KVM_CAP_PIT_STATE2:
3704 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
3705 case KVM_CAP_XEN_HVM:
3706 case KVM_CAP_VCPU_EVENTS:
3707 case KVM_CAP_HYPERV:
3708 case KVM_CAP_HYPERV_VAPIC:
3709 case KVM_CAP_HYPERV_SPIN:
3710 case KVM_CAP_HYPERV_SYNIC:
3711 case KVM_CAP_HYPERV_SYNIC2:
3712 case KVM_CAP_HYPERV_VP_INDEX:
3713 case KVM_CAP_HYPERV_EVENTFD:
3714 case KVM_CAP_HYPERV_TLBFLUSH:
3715 case KVM_CAP_HYPERV_SEND_IPI:
3716 case KVM_CAP_HYPERV_CPUID:
3717 case KVM_CAP_PCI_SEGMENT:
3718 case KVM_CAP_DEBUGREGS:
3719 case KVM_CAP_X86_ROBUST_SINGLESTEP:
3720 case KVM_CAP_XSAVE:
3721 case KVM_CAP_ASYNC_PF:
3722 case KVM_CAP_ASYNC_PF_INT:
3723 case KVM_CAP_GET_TSC_KHZ:
3724 case KVM_CAP_KVMCLOCK_CTRL:
3725 case KVM_CAP_READONLY_MEM:
3726 case KVM_CAP_HYPERV_TIME:
3727 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
3728 case KVM_CAP_TSC_DEADLINE_TIMER:
3729 case KVM_CAP_DISABLE_QUIRKS:
3730 case KVM_CAP_SET_BOOT_CPU_ID:
3731 case KVM_CAP_SPLIT_IRQCHIP:
3732 case KVM_CAP_IMMEDIATE_EXIT:
3733 case KVM_CAP_PMU_EVENT_FILTER:
3734 case KVM_CAP_GET_MSR_FEATURES:
3735 case KVM_CAP_MSR_PLATFORM_INFO:
3736 case KVM_CAP_EXCEPTION_PAYLOAD:
3737 case KVM_CAP_SET_GUEST_DEBUG:
3738 case KVM_CAP_LAST_CPU:
3739 case KVM_CAP_X86_USER_SPACE_MSR:
3740 case KVM_CAP_X86_MSR_FILTER:
3741 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
3742 r = 1;
3743 break;
3744 case KVM_CAP_SYNC_REGS:
3745 r = KVM_SYNC_X86_VALID_FIELDS;
3746 break;
3747 case KVM_CAP_ADJUST_CLOCK:
3748 r = KVM_CLOCK_TSC_STABLE;
3749 break;
3750 case KVM_CAP_X86_DISABLE_EXITS:
3751 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
3752 KVM_X86_DISABLE_EXITS_CSTATE;
3753 if(kvm_can_mwait_in_guest())
3754 r |= KVM_X86_DISABLE_EXITS_MWAIT;
3755 break;
3756 case KVM_CAP_X86_SMM:
3757
3758
3759
3760
3761
3762
3763
3764
3765 r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE);
3766 break;
3767 case KVM_CAP_VAPIC:
3768 r = !kvm_x86_ops.cpu_has_accelerated_tpr();
3769 break;
3770 case KVM_CAP_NR_VCPUS:
3771 r = KVM_SOFT_MAX_VCPUS;
3772 break;
3773 case KVM_CAP_MAX_VCPUS:
3774 r = KVM_MAX_VCPUS;
3775 break;
3776 case KVM_CAP_MAX_VCPU_ID:
3777 r = KVM_MAX_VCPU_ID;
3778 break;
3779 case KVM_CAP_PV_MMU:
3780 r = 0;
3781 break;
3782 case KVM_CAP_MCE:
3783 r = KVM_MAX_MCE_BANKS;
3784 break;
3785 case KVM_CAP_XCRS:
3786 r = boot_cpu_has(X86_FEATURE_XSAVE);
3787 break;
3788 case KVM_CAP_TSC_CONTROL:
3789 r = kvm_has_tsc_control;
3790 break;
3791 case KVM_CAP_X2APIC_API:
3792 r = KVM_X2APIC_API_VALID_FLAGS;
3793 break;
3794 case KVM_CAP_NESTED_STATE:
3795 r = kvm_x86_ops.nested_ops->get_state ?
3796 kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
3797 break;
3798 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
3799 r = kvm_x86_ops.enable_direct_tlbflush != NULL;
3800 break;
3801 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
3802 r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
3803 break;
3804 case KVM_CAP_SMALLER_MAXPHYADDR:
3805 r = (int) allow_smaller_maxphyaddr;
3806 break;
3807 case KVM_CAP_STEAL_TIME:
3808 r = sched_info_on();
3809 break;
3810 default:
3811 break;
3812 }
3813 return r;
3814
3815}
3816
3817long kvm_arch_dev_ioctl(struct file *filp,
3818 unsigned int ioctl, unsigned long arg)
3819{
3820 void __user *argp = (void __user *)arg;
3821 long r;
3822
3823 switch (ioctl) {
3824 case KVM_GET_MSR_INDEX_LIST: {
3825 struct kvm_msr_list __user *user_msr_list = argp;
3826 struct kvm_msr_list msr_list;
3827 unsigned n;
3828
3829 r = -EFAULT;
3830 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3831 goto out;
3832 n = msr_list.nmsrs;
3833 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
3834 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3835 goto out;
3836 r = -E2BIG;
3837 if (n < msr_list.nmsrs)
3838 goto out;
3839 r = -EFAULT;
3840 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
3841 num_msrs_to_save * sizeof(u32)))
3842 goto out;
3843 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
3844 &emulated_msrs,
3845 num_emulated_msrs * sizeof(u32)))
3846 goto out;
3847 r = 0;
3848 break;
3849 }
3850 case KVM_GET_SUPPORTED_CPUID:
3851 case KVM_GET_EMULATED_CPUID: {
3852 struct kvm_cpuid2 __user *cpuid_arg = argp;
3853 struct kvm_cpuid2 cpuid;
3854
3855 r = -EFAULT;
3856 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
3857 goto out;
3858
3859 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
3860 ioctl);
3861 if (r)
3862 goto out;
3863
3864 r = -EFAULT;
3865 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
3866 goto out;
3867 r = 0;
3868 break;
3869 }
3870 case KVM_X86_GET_MCE_CAP_SUPPORTED:
3871 r = -EFAULT;
3872 if (copy_to_user(argp, &kvm_mce_cap_supported,
3873 sizeof(kvm_mce_cap_supported)))
3874 goto out;
3875 r = 0;
3876 break;
3877 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
3878 struct kvm_msr_list __user *user_msr_list = argp;
3879 struct kvm_msr_list msr_list;
3880 unsigned int n;
3881
3882 r = -EFAULT;
3883 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3884 goto out;
3885 n = msr_list.nmsrs;
3886 msr_list.nmsrs = num_msr_based_features;
3887 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3888 goto out;
3889 r = -E2BIG;
3890 if (n < msr_list.nmsrs)
3891 goto out;
3892 r = -EFAULT;
3893 if (copy_to_user(user_msr_list->indices, &msr_based_features,
3894 num_msr_based_features * sizeof(u32)))
3895 goto out;
3896 r = 0;
3897 break;
3898 }
3899 case KVM_GET_MSRS:
3900 r = msr_io(NULL, argp, do_get_msr_feature, 1);
3901 break;
3902 default:
3903 r = -EINVAL;
3904 break;
3905 }
3906out:
3907 return r;
3908}
3909
3910static void wbinvd_ipi(void *garbage)
3911{
3912 wbinvd();
3913}
3914
3915static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
3916{
3917 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
3918}
3919
3920void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3921{
3922
3923 if (need_emulate_wbinvd(vcpu)) {
3924 if (kvm_x86_ops.has_wbinvd_exit())
3925 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
3926 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
3927 smp_call_function_single(vcpu->cpu,
3928 wbinvd_ipi, NULL, 1);
3929 }
3930
3931 kvm_x86_ops.vcpu_load(vcpu, cpu);
3932
3933
3934 vcpu->arch.host_pkru = read_pkru();
3935
3936
3937 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
3938 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
3939 vcpu->arch.tsc_offset_adjustment = 0;
3940 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3941 }
3942
3943 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
3944 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
3945 rdtsc() - vcpu->arch.last_host_tsc;
3946 if (tsc_delta < 0)
3947 mark_tsc_unstable("KVM discovered backwards TSC");
3948
3949 if (kvm_check_tsc_unstable()) {
3950 u64 offset = kvm_compute_tsc_offset(vcpu,
3951 vcpu->arch.last_guest_tsc);
3952 kvm_vcpu_write_tsc_offset(vcpu, offset);
3953 vcpu->arch.tsc_catchup = 1;
3954 }
3955
3956 if (kvm_lapic_hv_timer_in_use(vcpu))
3957 kvm_lapic_restart_hv_timer(vcpu);
3958
3959
3960
3961
3962
3963 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
3964 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
3965 if (vcpu->cpu != cpu)
3966 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
3967 vcpu->cpu = cpu;
3968 }
3969
3970 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3971}
3972
3973static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
3974{
3975 struct kvm_host_map map;
3976 struct kvm_steal_time *st;
3977
3978 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3979 return;
3980
3981 if (vcpu->arch.st.preempted)
3982 return;
3983
3984 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
3985 &vcpu->arch.st.cache, true))
3986 return;
3987
3988 st = map.hva +
3989 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
3990
3991 st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
3992
3993 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
3994}
3995
3996void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3997{
3998 int idx;
3999
4000 if (vcpu->preempted)
4001 vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011 pagefault_disable();
4012
4013
4014
4015
4016 idx = srcu_read_lock(&vcpu->kvm->srcu);
4017 kvm_steal_time_set_preempted(vcpu);
4018 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4019 pagefault_enable();
4020 kvm_x86_ops.vcpu_put(vcpu);
4021 vcpu->arch.last_host_tsc = rdtsc();
4022
4023
4024
4025
4026
4027 set_debugreg(0, 6);
4028}
4029
4030static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
4031 struct kvm_lapic_state *s)
4032{
4033 if (vcpu->arch.apicv_active)
4034 kvm_x86_ops.sync_pir_to_irr(vcpu);
4035
4036 return kvm_apic_get_state(vcpu, s);
4037}
4038
4039static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
4040 struct kvm_lapic_state *s)
4041{
4042 int r;
4043
4044 r = kvm_apic_set_state(vcpu, s);
4045 if (r)
4046 return r;
4047 update_cr8_intercept(vcpu);
4048
4049 return 0;
4050}
4051
4052static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
4053{
4054
4055
4056
4057
4058
4059
4060 if (kvm_cpu_has_extint(vcpu))
4061 return false;
4062
4063
4064 return (!lapic_in_kernel(vcpu) ||
4065 kvm_apic_accept_pic_intr(vcpu));
4066}
4067
4068static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
4069{
4070 return kvm_arch_interrupt_allowed(vcpu) &&
4071 kvm_cpu_accept_dm_intr(vcpu);
4072}
4073
4074static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
4075 struct kvm_interrupt *irq)
4076{
4077 if (irq->irq >= KVM_NR_INTERRUPTS)
4078 return -EINVAL;
4079
4080 if (!irqchip_in_kernel(vcpu->kvm)) {
4081 kvm_queue_interrupt(vcpu, irq->irq, false);
4082 kvm_make_request(KVM_REQ_EVENT, vcpu);
4083 return 0;
4084 }
4085
4086
4087
4088
4089
4090 if (pic_in_kernel(vcpu->kvm))
4091 return -ENXIO;
4092
4093 if (vcpu->arch.pending_external_vector != -1)
4094 return -EEXIST;
4095
4096 vcpu->arch.pending_external_vector = irq->irq;
4097 kvm_make_request(KVM_REQ_EVENT, vcpu);
4098 return 0;
4099}
4100
4101static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
4102{
4103 kvm_inject_nmi(vcpu);
4104
4105 return 0;
4106}
4107
4108static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
4109{
4110 kvm_make_request(KVM_REQ_SMI, vcpu);
4111
4112 return 0;
4113}
4114
4115static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
4116 struct kvm_tpr_access_ctl *tac)
4117{
4118 if (tac->flags)
4119 return -EINVAL;
4120 vcpu->arch.tpr_access_reporting = !!tac->enabled;
4121 return 0;
4122}
4123
4124static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
4125 u64 mcg_cap)
4126{
4127 int r;
4128 unsigned bank_num = mcg_cap & 0xff, bank;
4129
4130 r = -EINVAL;
4131 if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
4132 goto out;
4133 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
4134 goto out;
4135 r = 0;
4136 vcpu->arch.mcg_cap = mcg_cap;
4137
4138 if (mcg_cap & MCG_CTL_P)
4139 vcpu->arch.mcg_ctl = ~(u64)0;
4140
4141 for (bank = 0; bank < bank_num; bank++)
4142 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
4143
4144 kvm_x86_ops.setup_mce(vcpu);
4145out:
4146 return r;
4147}
4148
4149static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
4150 struct kvm_x86_mce *mce)
4151{
4152 u64 mcg_cap = vcpu->arch.mcg_cap;
4153 unsigned bank_num = mcg_cap & 0xff;
4154 u64 *banks = vcpu->arch.mce_banks;
4155
4156 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
4157 return -EINVAL;
4158
4159
4160
4161
4162 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
4163 vcpu->arch.mcg_ctl != ~(u64)0)
4164 return 0;
4165 banks += 4 * mce->bank;
4166
4167
4168
4169
4170 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
4171 return 0;
4172 if (mce->status & MCI_STATUS_UC) {
4173 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
4174 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
4175 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
4176 return 0;
4177 }
4178 if (banks[1] & MCI_STATUS_VAL)
4179 mce->status |= MCI_STATUS_OVER;
4180 banks[2] = mce->addr;
4181 banks[3] = mce->misc;
4182 vcpu->arch.mcg_status = mce->mcg_status;
4183 banks[1] = mce->status;
4184 kvm_queue_exception(vcpu, MC_VECTOR);
4185 } else if (!(banks[1] & MCI_STATUS_VAL)
4186 || !(banks[1] & MCI_STATUS_UC)) {
4187 if (banks[1] & MCI_STATUS_VAL)
4188 mce->status |= MCI_STATUS_OVER;
4189 banks[2] = mce->addr;
4190 banks[3] = mce->misc;
4191 banks[1] = mce->status;
4192 } else
4193 banks[1] |= MCI_STATUS_OVER;
4194 return 0;
4195}
4196
4197static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
4198 struct kvm_vcpu_events *events)
4199{
4200 process_nmi(vcpu);
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213 if (!vcpu->kvm->arch.exception_payload_enabled &&
4214 vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
4215 kvm_deliver_exception_payload(vcpu);
4216
4217
4218
4219
4220
4221
4222
4223 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
4224 events->exception.injected = 0;
4225 events->exception.pending = 0;
4226 } else {
4227 events->exception.injected = vcpu->arch.exception.injected;
4228 events->exception.pending = vcpu->arch.exception.pending;
4229
4230
4231
4232
4233
4234 if (!vcpu->kvm->arch.exception_payload_enabled)
4235 events->exception.injected |=
4236 vcpu->arch.exception.pending;
4237 }
4238 events->exception.nr = vcpu->arch.exception.nr;
4239 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
4240 events->exception.error_code = vcpu->arch.exception.error_code;
4241 events->exception_has_payload = vcpu->arch.exception.has_payload;
4242 events->exception_payload = vcpu->arch.exception.payload;
4243
4244 events->interrupt.injected =
4245 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
4246 events->interrupt.nr = vcpu->arch.interrupt.nr;
4247 events->interrupt.soft = 0;
4248 events->interrupt.shadow = kvm_x86_ops.get_interrupt_shadow(vcpu);
4249
4250 events->nmi.injected = vcpu->arch.nmi_injected;
4251 events->nmi.pending = vcpu->arch.nmi_pending != 0;
4252 events->nmi.masked = kvm_x86_ops.get_nmi_mask(vcpu);
4253 events->nmi.pad = 0;
4254
4255 events->sipi_vector = 0;
4256
4257 events->smi.smm = is_smm(vcpu);
4258 events->smi.pending = vcpu->arch.smi_pending;
4259 events->smi.smm_inside_nmi =
4260 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
4261 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
4262
4263 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
4264 | KVM_VCPUEVENT_VALID_SHADOW
4265 | KVM_VCPUEVENT_VALID_SMM);
4266 if (vcpu->kvm->arch.exception_payload_enabled)
4267 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
4268
4269 memset(&events->reserved, 0, sizeof(events->reserved));
4270}
4271
4272static void kvm_smm_changed(struct kvm_vcpu *vcpu);
4273
4274static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
4275 struct kvm_vcpu_events *events)
4276{
4277 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
4278 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
4279 | KVM_VCPUEVENT_VALID_SHADOW
4280 | KVM_VCPUEVENT_VALID_SMM
4281 | KVM_VCPUEVENT_VALID_PAYLOAD))
4282 return -EINVAL;
4283
4284 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
4285 if (!vcpu->kvm->arch.exception_payload_enabled)
4286 return -EINVAL;
4287 if (events->exception.pending)
4288 events->exception.injected = 0;
4289 else
4290 events->exception_has_payload = 0;
4291 } else {
4292 events->exception.pending = 0;
4293 events->exception_has_payload = 0;
4294 }
4295
4296 if ((events->exception.injected || events->exception.pending) &&
4297 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
4298 return -EINVAL;
4299
4300
4301 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
4302 (events->smi.smm || events->smi.pending) &&
4303 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4304 return -EINVAL;
4305
4306 process_nmi(vcpu);
4307 vcpu->arch.exception.injected = events->exception.injected;
4308 vcpu->arch.exception.pending = events->exception.pending;
4309 vcpu->arch.exception.nr = events->exception.nr;
4310 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
4311 vcpu->arch.exception.error_code = events->exception.error_code;
4312 vcpu->arch.exception.has_payload = events->exception_has_payload;
4313 vcpu->arch.exception.payload = events->exception_payload;
4314
4315 vcpu->arch.interrupt.injected = events->interrupt.injected;
4316 vcpu->arch.interrupt.nr = events->interrupt.nr;
4317 vcpu->arch.interrupt.soft = events->interrupt.soft;
4318 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
4319 kvm_x86_ops.set_interrupt_shadow(vcpu,
4320 events->interrupt.shadow);
4321
4322 vcpu->arch.nmi_injected = events->nmi.injected;
4323 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
4324 vcpu->arch.nmi_pending = events->nmi.pending;
4325 kvm_x86_ops.set_nmi_mask(vcpu, events->nmi.masked);
4326
4327 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
4328 lapic_in_kernel(vcpu))
4329 vcpu->arch.apic->sipi_vector = events->sipi_vector;
4330
4331 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
4332 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
4333 if (events->smi.smm)
4334 vcpu->arch.hflags |= HF_SMM_MASK;
4335 else
4336 vcpu->arch.hflags &= ~HF_SMM_MASK;
4337 kvm_smm_changed(vcpu);
4338 }
4339
4340 vcpu->arch.smi_pending = events->smi.pending;
4341
4342 if (events->smi.smm) {
4343 if (events->smi.smm_inside_nmi)
4344 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
4345 else
4346 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
4347 }
4348
4349 if (lapic_in_kernel(vcpu)) {
4350 if (events->smi.latched_init)
4351 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4352 else
4353 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4354 }
4355 }
4356
4357 kvm_make_request(KVM_REQ_EVENT, vcpu);
4358
4359 return 0;
4360}
4361
4362static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
4363 struct kvm_debugregs *dbgregs)
4364{
4365 unsigned long val;
4366
4367 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
4368 kvm_get_dr(vcpu, 6, &val);
4369 dbgregs->dr6 = val;
4370 dbgregs->dr7 = vcpu->arch.dr7;
4371 dbgregs->flags = 0;
4372 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
4373}
4374
4375static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
4376 struct kvm_debugregs *dbgregs)
4377{
4378 if (dbgregs->flags)
4379 return -EINVAL;
4380
4381 if (dbgregs->dr6 & ~0xffffffffull)
4382 return -EINVAL;
4383 if (dbgregs->dr7 & ~0xffffffffull)
4384 return -EINVAL;
4385
4386 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
4387 kvm_update_dr0123(vcpu);
4388 vcpu->arch.dr6 = dbgregs->dr6;
4389 vcpu->arch.dr7 = dbgregs->dr7;
4390 kvm_update_dr7(vcpu);
4391
4392 return 0;
4393}
4394
4395#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
4396
4397static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
4398{
4399 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
4400 u64 xstate_bv = xsave->header.xfeatures;
4401 u64 valid;
4402
4403
4404
4405
4406
4407 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
4408
4409
4410 xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
4411 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
4412
4413
4414
4415
4416
4417 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
4418 while (valid) {
4419 u64 xfeature_mask = valid & -valid;
4420 int xfeature_nr = fls64(xfeature_mask) - 1;
4421 void *src = get_xsave_addr(xsave, xfeature_nr);
4422
4423 if (src) {
4424 u32 size, offset, ecx, edx;
4425 cpuid_count(XSTATE_CPUID, xfeature_nr,
4426 &size, &offset, &ecx, &edx);
4427 if (xfeature_nr == XFEATURE_PKRU)
4428 memcpy(dest + offset, &vcpu->arch.pkru,
4429 sizeof(vcpu->arch.pkru));
4430 else
4431 memcpy(dest + offset, src, size);
4432
4433 }
4434
4435 valid -= xfeature_mask;
4436 }
4437}
4438
4439static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
4440{
4441 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
4442 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
4443 u64 valid;
4444
4445
4446
4447
4448
4449 memcpy(xsave, src, XSAVE_HDR_OFFSET);
4450
4451
4452 xsave->header.xfeatures = xstate_bv;
4453 if (boot_cpu_has(X86_FEATURE_XSAVES))
4454 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
4455
4456
4457
4458
4459
4460 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
4461 while (valid) {
4462 u64 xfeature_mask = valid & -valid;
4463 int xfeature_nr = fls64(xfeature_mask) - 1;
4464 void *dest = get_xsave_addr(xsave, xfeature_nr);
4465
4466 if (dest) {
4467 u32 size, offset, ecx, edx;
4468 cpuid_count(XSTATE_CPUID, xfeature_nr,
4469 &size, &offset, &ecx, &edx);
4470 if (xfeature_nr == XFEATURE_PKRU)
4471 memcpy(&vcpu->arch.pkru, src + offset,
4472 sizeof(vcpu->arch.pkru));
4473 else
4474 memcpy(dest, src + offset, size);
4475 }
4476
4477 valid -= xfeature_mask;
4478 }
4479}
4480
4481static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
4482 struct kvm_xsave *guest_xsave)
4483{
4484 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4485 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
4486 fill_xsave((u8 *) guest_xsave->region, vcpu);
4487 } else {
4488 memcpy(guest_xsave->region,
4489 &vcpu->arch.guest_fpu->state.fxsave,
4490 sizeof(struct fxregs_state));
4491 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
4492 XFEATURE_MASK_FPSSE;
4493 }
4494}
4495
4496#define XSAVE_MXCSR_OFFSET 24
4497
4498static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
4499 struct kvm_xsave *guest_xsave)
4500{
4501 u64 xstate_bv =
4502 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
4503 u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
4504
4505 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4506
4507
4508
4509
4510
4511 if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask)
4512 return -EINVAL;
4513 load_xsave(vcpu, (u8 *)guest_xsave->region);
4514 } else {
4515 if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
4516 mxcsr & ~mxcsr_feature_mask)
4517 return -EINVAL;
4518 memcpy(&vcpu->arch.guest_fpu->state.fxsave,
4519 guest_xsave->region, sizeof(struct fxregs_state));
4520 }
4521 return 0;
4522}
4523
4524static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
4525 struct kvm_xcrs *guest_xcrs)
4526{
4527 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
4528 guest_xcrs->nr_xcrs = 0;
4529 return;
4530 }
4531
4532 guest_xcrs->nr_xcrs = 1;
4533 guest_xcrs->flags = 0;
4534 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
4535 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
4536}
4537
4538static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
4539 struct kvm_xcrs *guest_xcrs)
4540{
4541 int i, r = 0;
4542
4543 if (!boot_cpu_has(X86_FEATURE_XSAVE))
4544 return -EINVAL;
4545
4546 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
4547 return -EINVAL;
4548
4549 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
4550
4551 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
4552 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
4553 guest_xcrs->xcrs[i].value);
4554 break;
4555 }
4556 if (r)
4557 r = -EINVAL;
4558 return r;
4559}
4560
4561
4562
4563
4564
4565
4566
4567static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
4568{
4569 if (!vcpu->arch.pv_time_enabled)
4570 return -EINVAL;
4571 vcpu->arch.pvclock_set_guest_stopped_request = true;
4572 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4573 return 0;
4574}
4575
4576static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4577 struct kvm_enable_cap *cap)
4578{
4579 int r;
4580 uint16_t vmcs_version;
4581 void __user *user_ptr;
4582
4583 if (cap->flags)
4584 return -EINVAL;
4585
4586 switch (cap->cap) {
4587 case KVM_CAP_HYPERV_SYNIC2:
4588 if (cap->args[0])
4589 return -EINVAL;
4590 fallthrough;
4591
4592 case KVM_CAP_HYPERV_SYNIC:
4593 if (!irqchip_in_kernel(vcpu->kvm))
4594 return -EINVAL;
4595 return kvm_hv_activate_synic(vcpu, cap->cap ==
4596 KVM_CAP_HYPERV_SYNIC2);
4597 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4598 if (!kvm_x86_ops.nested_ops->enable_evmcs)
4599 return -ENOTTY;
4600 r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
4601 if (!r) {
4602 user_ptr = (void __user *)(uintptr_t)cap->args[0];
4603 if (copy_to_user(user_ptr, &vmcs_version,
4604 sizeof(vmcs_version)))
4605 r = -EFAULT;
4606 }
4607 return r;
4608 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4609 if (!kvm_x86_ops.enable_direct_tlbflush)
4610 return -ENOTTY;
4611
4612 return kvm_x86_ops.enable_direct_tlbflush(vcpu);
4613
4614 case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
4615 vcpu->arch.pv_cpuid.enforce = cap->args[0];
4616 if (vcpu->arch.pv_cpuid.enforce)
4617 kvm_update_pv_runtime(vcpu);
4618
4619 return 0;
4620
4621 default:
4622 return -EINVAL;
4623 }
4624}
4625
4626long kvm_arch_vcpu_ioctl(struct file *filp,
4627 unsigned int ioctl, unsigned long arg)
4628{
4629 struct kvm_vcpu *vcpu = filp->private_data;
4630 void __user *argp = (void __user *)arg;
4631 int r;
4632 union {
4633 struct kvm_lapic_state *lapic;
4634 struct kvm_xsave *xsave;
4635 struct kvm_xcrs *xcrs;
4636 void *buffer;
4637 } u;
4638
4639 vcpu_load(vcpu);
4640
4641 u.buffer = NULL;
4642 switch (ioctl) {
4643 case KVM_GET_LAPIC: {
4644 r = -EINVAL;
4645 if (!lapic_in_kernel(vcpu))
4646 goto out;
4647 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
4648 GFP_KERNEL_ACCOUNT);
4649
4650 r = -ENOMEM;
4651 if (!u.lapic)
4652 goto out;
4653 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
4654 if (r)
4655 goto out;
4656 r = -EFAULT;
4657 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
4658 goto out;
4659 r = 0;
4660 break;
4661 }
4662 case KVM_SET_LAPIC: {
4663 r = -EINVAL;
4664 if (!lapic_in_kernel(vcpu))
4665 goto out;
4666 u.lapic = memdup_user(argp, sizeof(*u.lapic));
4667 if (IS_ERR(u.lapic)) {
4668 r = PTR_ERR(u.lapic);
4669 goto out_nofree;
4670 }
4671
4672 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
4673 break;
4674 }
4675 case KVM_INTERRUPT: {
4676 struct kvm_interrupt irq;
4677
4678 r = -EFAULT;
4679 if (copy_from_user(&irq, argp, sizeof(irq)))
4680 goto out;
4681 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
4682 break;
4683 }
4684 case KVM_NMI: {
4685 r = kvm_vcpu_ioctl_nmi(vcpu);
4686 break;
4687 }
4688 case KVM_SMI: {
4689 r = kvm_vcpu_ioctl_smi(vcpu);
4690 break;
4691 }
4692 case KVM_SET_CPUID: {
4693 struct kvm_cpuid __user *cpuid_arg = argp;
4694 struct kvm_cpuid cpuid;
4695
4696 r = -EFAULT;
4697 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4698 goto out;
4699 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
4700 break;
4701 }
4702 case KVM_SET_CPUID2: {
4703 struct kvm_cpuid2 __user *cpuid_arg = argp;
4704 struct kvm_cpuid2 cpuid;
4705
4706 r = -EFAULT;
4707 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4708 goto out;
4709 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
4710 cpuid_arg->entries);
4711 break;
4712 }
4713 case KVM_GET_CPUID2: {
4714 struct kvm_cpuid2 __user *cpuid_arg = argp;
4715 struct kvm_cpuid2 cpuid;
4716
4717 r = -EFAULT;
4718 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4719 goto out;
4720 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
4721 cpuid_arg->entries);
4722 if (r)
4723 goto out;
4724 r = -EFAULT;
4725 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4726 goto out;
4727 r = 0;
4728 break;
4729 }
4730 case KVM_GET_MSRS: {
4731 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4732 r = msr_io(vcpu, argp, do_get_msr, 1);
4733 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4734 break;
4735 }
4736 case KVM_SET_MSRS: {
4737 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4738 r = msr_io(vcpu, argp, do_set_msr, 0);
4739 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4740 break;
4741 }
4742 case KVM_TPR_ACCESS_REPORTING: {
4743 struct kvm_tpr_access_ctl tac;
4744
4745 r = -EFAULT;
4746 if (copy_from_user(&tac, argp, sizeof(tac)))
4747 goto out;
4748 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
4749 if (r)
4750 goto out;
4751 r = -EFAULT;
4752 if (copy_to_user(argp, &tac, sizeof(tac)))
4753 goto out;
4754 r = 0;
4755 break;
4756 };
4757 case KVM_SET_VAPIC_ADDR: {
4758 struct kvm_vapic_addr va;
4759 int idx;
4760
4761 r = -EINVAL;
4762 if (!lapic_in_kernel(vcpu))
4763 goto out;
4764 r = -EFAULT;
4765 if (copy_from_user(&va, argp, sizeof(va)))
4766 goto out;
4767 idx = srcu_read_lock(&vcpu->kvm->srcu);
4768 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
4769 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4770 break;
4771 }
4772 case KVM_X86_SETUP_MCE: {
4773 u64 mcg_cap;
4774
4775 r = -EFAULT;
4776 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
4777 goto out;
4778 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
4779 break;
4780 }
4781 case KVM_X86_SET_MCE: {
4782 struct kvm_x86_mce mce;
4783
4784 r = -EFAULT;
4785 if (copy_from_user(&mce, argp, sizeof(mce)))
4786 goto out;
4787 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
4788 break;
4789 }
4790 case KVM_GET_VCPU_EVENTS: {
4791 struct kvm_vcpu_events events;
4792
4793 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
4794
4795 r = -EFAULT;
4796 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
4797 break;
4798 r = 0;
4799 break;
4800 }
4801 case KVM_SET_VCPU_EVENTS: {
4802 struct kvm_vcpu_events events;
4803
4804 r = -EFAULT;
4805 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
4806 break;
4807
4808 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
4809 break;
4810 }
4811 case KVM_GET_DEBUGREGS: {
4812 struct kvm_debugregs dbgregs;
4813
4814 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
4815
4816 r = -EFAULT;
4817 if (copy_to_user(argp, &dbgregs,
4818 sizeof(struct kvm_debugregs)))
4819 break;
4820 r = 0;
4821 break;
4822 }
4823 case KVM_SET_DEBUGREGS: {
4824 struct kvm_debugregs dbgregs;
4825
4826 r = -EFAULT;
4827 if (copy_from_user(&dbgregs, argp,
4828 sizeof(struct kvm_debugregs)))
4829 break;
4830
4831 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
4832 break;
4833 }
4834 case KVM_GET_XSAVE: {
4835 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
4836 r = -ENOMEM;
4837 if (!u.xsave)
4838 break;
4839
4840 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
4841
4842 r = -EFAULT;
4843 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
4844 break;
4845 r = 0;
4846 break;
4847 }
4848 case KVM_SET_XSAVE: {
4849 u.xsave = memdup_user(argp, sizeof(*u.xsave));
4850 if (IS_ERR(u.xsave)) {
4851 r = PTR_ERR(u.xsave);
4852 goto out_nofree;
4853 }
4854
4855 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
4856 break;
4857 }
4858 case KVM_GET_XCRS: {
4859 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
4860 r = -ENOMEM;
4861 if (!u.xcrs)
4862 break;
4863
4864 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
4865
4866 r = -EFAULT;
4867 if (copy_to_user(argp, u.xcrs,
4868 sizeof(struct kvm_xcrs)))
4869 break;
4870 r = 0;
4871 break;
4872 }
4873 case KVM_SET_XCRS: {
4874 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
4875 if (IS_ERR(u.xcrs)) {
4876 r = PTR_ERR(u.xcrs);
4877 goto out_nofree;
4878 }
4879
4880 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
4881 break;
4882 }
4883 case KVM_SET_TSC_KHZ: {
4884 u32 user_tsc_khz;
4885
4886 r = -EINVAL;
4887 user_tsc_khz = (u32)arg;
4888
4889 if (kvm_has_tsc_control &&
4890 user_tsc_khz >= kvm_max_guest_tsc_khz)
4891 goto out;
4892
4893 if (user_tsc_khz == 0)
4894 user_tsc_khz = tsc_khz;
4895
4896 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
4897 r = 0;
4898
4899 goto out;
4900 }
4901 case KVM_GET_TSC_KHZ: {
4902 r = vcpu->arch.virtual_tsc_khz;
4903 goto out;
4904 }
4905 case KVM_KVMCLOCK_CTRL: {
4906 r = kvm_set_guest_paused(vcpu);
4907 goto out;
4908 }
4909 case KVM_ENABLE_CAP: {
4910 struct kvm_enable_cap cap;
4911
4912 r = -EFAULT;
4913 if (copy_from_user(&cap, argp, sizeof(cap)))
4914 goto out;
4915 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4916 break;
4917 }
4918 case KVM_GET_NESTED_STATE: {
4919 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4920 u32 user_data_size;
4921
4922 r = -EINVAL;
4923 if (!kvm_x86_ops.nested_ops->get_state)
4924 break;
4925
4926 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
4927 r = -EFAULT;
4928 if (get_user(user_data_size, &user_kvm_nested_state->size))
4929 break;
4930
4931 r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
4932 user_data_size);
4933 if (r < 0)
4934 break;
4935
4936 if (r > user_data_size) {
4937 if (put_user(r, &user_kvm_nested_state->size))
4938 r = -EFAULT;
4939 else
4940 r = -E2BIG;
4941 break;
4942 }
4943
4944 r = 0;
4945 break;
4946 }
4947 case KVM_SET_NESTED_STATE: {
4948 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4949 struct kvm_nested_state kvm_state;
4950 int idx;
4951
4952 r = -EINVAL;
4953 if (!kvm_x86_ops.nested_ops->set_state)
4954 break;
4955
4956 r = -EFAULT;
4957 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
4958 break;
4959
4960 r = -EINVAL;
4961 if (kvm_state.size < sizeof(kvm_state))
4962 break;
4963
4964 if (kvm_state.flags &
4965 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
4966 | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
4967 | KVM_STATE_NESTED_GIF_SET))
4968 break;
4969
4970
4971 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
4972 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
4973 break;
4974
4975 idx = srcu_read_lock(&vcpu->kvm->srcu);
4976 r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
4977 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4978 break;
4979 }
4980 case KVM_GET_SUPPORTED_HV_CPUID: {
4981 struct kvm_cpuid2 __user *cpuid_arg = argp;
4982 struct kvm_cpuid2 cpuid;
4983
4984 r = -EFAULT;
4985 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4986 goto out;
4987
4988 r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
4989 cpuid_arg->entries);
4990 if (r)
4991 goto out;
4992
4993 r = -EFAULT;
4994 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4995 goto out;
4996 r = 0;
4997 break;
4998 }
4999 default:
5000 r = -EINVAL;
5001 }
5002out:
5003 kfree(u.buffer);
5004out_nofree:
5005 vcpu_put(vcpu);
5006 return r;
5007}
5008
5009vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5010{
5011 return VM_FAULT_SIGBUS;
5012}
5013
5014static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
5015{
5016 int ret;
5017
5018 if (addr > (unsigned int)(-3 * PAGE_SIZE))
5019 return -EINVAL;
5020 ret = kvm_x86_ops.set_tss_addr(kvm, addr);
5021 return ret;
5022}
5023
5024static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
5025 u64 ident_addr)
5026{
5027 return kvm_x86_ops.set_identity_map_addr(kvm, ident_addr);
5028}
5029
5030static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
5031 unsigned long kvm_nr_mmu_pages)
5032{
5033 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
5034 return -EINVAL;
5035
5036 mutex_lock(&kvm->slots_lock);
5037
5038 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
5039 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
5040
5041 mutex_unlock(&kvm->slots_lock);
5042 return 0;
5043}
5044
5045static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
5046{
5047 return kvm->arch.n_max_mmu_pages;
5048}
5049
5050static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5051{
5052 struct kvm_pic *pic = kvm->arch.vpic;
5053 int r;
5054
5055 r = 0;
5056 switch (chip->chip_id) {
5057 case KVM_IRQCHIP_PIC_MASTER:
5058 memcpy(&chip->chip.pic, &pic->pics[0],
5059 sizeof(struct kvm_pic_state));
5060 break;
5061 case KVM_IRQCHIP_PIC_SLAVE:
5062 memcpy(&chip->chip.pic, &pic->pics[1],
5063 sizeof(struct kvm_pic_state));
5064 break;
5065 case KVM_IRQCHIP_IOAPIC:
5066 kvm_get_ioapic(kvm, &chip->chip.ioapic);
5067 break;
5068 default:
5069 r = -EINVAL;
5070 break;
5071 }
5072 return r;
5073}
5074
5075static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
5076{
5077 struct kvm_pic *pic = kvm->arch.vpic;
5078 int r;
5079
5080 r = 0;
5081 switch (chip->chip_id) {
5082 case KVM_IRQCHIP_PIC_MASTER:
5083 spin_lock(&pic->lock);
5084 memcpy(&pic->pics[0], &chip->chip.pic,
5085 sizeof(struct kvm_pic_state));
5086 spin_unlock(&pic->lock);
5087 break;
5088 case KVM_IRQCHIP_PIC_SLAVE:
5089 spin_lock(&pic->lock);
5090 memcpy(&pic->pics[1], &chip->chip.pic,
5091 sizeof(struct kvm_pic_state));
5092 spin_unlock(&pic->lock);
5093 break;
5094 case KVM_IRQCHIP_IOAPIC:
5095 kvm_set_ioapic(kvm, &chip->chip.ioapic);
5096 break;
5097 default:
5098 r = -EINVAL;
5099 break;
5100 }
5101 kvm_pic_update_irq(pic);
5102 return r;
5103}
5104
5105static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
5106{
5107 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
5108
5109 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
5110
5111 mutex_lock(&kps->lock);
5112 memcpy(ps, &kps->channels, sizeof(*ps));
5113 mutex_unlock(&kps->lock);
5114 return 0;
5115}
5116
5117static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
5118{
5119 int i;
5120 struct kvm_pit *pit = kvm->arch.vpit;
5121
5122 mutex_lock(&pit->pit_state.lock);
5123 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
5124 for (i = 0; i < 3; i++)
5125 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
5126 mutex_unlock(&pit->pit_state.lock);
5127 return 0;
5128}
5129
5130static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
5131{
5132 mutex_lock(&kvm->arch.vpit->pit_state.lock);
5133 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
5134 sizeof(ps->channels));
5135 ps->flags = kvm->arch.vpit->pit_state.flags;
5136 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
5137 memset(&ps->reserved, 0, sizeof(ps->reserved));
5138 return 0;
5139}
5140
5141static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
5142{
5143 int start = 0;
5144 int i;
5145 u32 prev_legacy, cur_legacy;
5146 struct kvm_pit *pit = kvm->arch.vpit;
5147
5148 mutex_lock(&pit->pit_state.lock);
5149 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
5150 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
5151 if (!prev_legacy && cur_legacy)
5152 start = 1;
5153 memcpy(&pit->pit_state.channels, &ps->channels,
5154 sizeof(pit->pit_state.channels));
5155 pit->pit_state.flags = ps->flags;
5156 for (i = 0; i < 3; i++)
5157 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
5158 start && i == 0);
5159 mutex_unlock(&pit->pit_state.lock);
5160 return 0;
5161}
5162
5163static int kvm_vm_ioctl_reinject(struct kvm *kvm,
5164 struct kvm_reinject_control *control)
5165{
5166 struct kvm_pit *pit = kvm->arch.vpit;
5167
5168
5169
5170
5171
5172 mutex_lock(&pit->pit_state.lock);
5173 kvm_pit_set_reinject(pit, control->pit_reinject);
5174 mutex_unlock(&pit->pit_state.lock);
5175
5176 return 0;
5177}
5178
5179void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
5180{
5181
5182
5183
5184 if (kvm_x86_ops.flush_log_dirty)
5185 kvm_x86_ops.flush_log_dirty(kvm);
5186}
5187
5188int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
5189 bool line_status)
5190{
5191 if (!irqchip_in_kernel(kvm))
5192 return -ENXIO;
5193
5194 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
5195 irq_event->irq, irq_event->level,
5196 line_status);
5197 return 0;
5198}
5199
5200int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
5201 struct kvm_enable_cap *cap)
5202{
5203 int r;
5204
5205 if (cap->flags)
5206 return -EINVAL;
5207
5208 switch (cap->cap) {
5209 case KVM_CAP_DISABLE_QUIRKS:
5210 kvm->arch.disabled_quirks = cap->args[0];
5211 r = 0;
5212 break;
5213 case KVM_CAP_SPLIT_IRQCHIP: {
5214 mutex_lock(&kvm->lock);
5215 r = -EINVAL;
5216 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
5217 goto split_irqchip_unlock;
5218 r = -EEXIST;
5219 if (irqchip_in_kernel(kvm))
5220 goto split_irqchip_unlock;
5221 if (kvm->created_vcpus)
5222 goto split_irqchip_unlock;
5223 r = kvm_setup_empty_irq_routing(kvm);
5224 if (r)
5225 goto split_irqchip_unlock;
5226
5227 smp_wmb();
5228 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
5229 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
5230 r = 0;
5231split_irqchip_unlock:
5232 mutex_unlock(&kvm->lock);
5233 break;
5234 }
5235 case KVM_CAP_X2APIC_API:
5236 r = -EINVAL;
5237 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
5238 break;
5239
5240 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
5241 kvm->arch.x2apic_format = true;
5242 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
5243 kvm->arch.x2apic_broadcast_quirk_disabled = true;
5244
5245 r = 0;
5246 break;
5247 case KVM_CAP_X86_DISABLE_EXITS:
5248 r = -EINVAL;
5249 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
5250 break;
5251
5252 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
5253 kvm_can_mwait_in_guest())
5254 kvm->arch.mwait_in_guest = true;
5255 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
5256 kvm->arch.hlt_in_guest = true;
5257 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
5258 kvm->arch.pause_in_guest = true;
5259 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
5260 kvm->arch.cstate_in_guest = true;
5261 r = 0;
5262 break;
5263 case KVM_CAP_MSR_PLATFORM_INFO:
5264 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
5265 r = 0;
5266 break;
5267 case KVM_CAP_EXCEPTION_PAYLOAD:
5268 kvm->arch.exception_payload_enabled = cap->args[0];
5269 r = 0;
5270 break;
5271 case KVM_CAP_X86_USER_SPACE_MSR:
5272 kvm->arch.user_space_msr_mask = cap->args[0];
5273 r = 0;
5274 break;
5275 default:
5276 r = -EINVAL;
5277 break;
5278 }
5279 return r;
5280}
5281
5282static void kvm_clear_msr_filter(struct kvm *kvm)
5283{
5284 u32 i;
5285 u32 count = kvm->arch.msr_filter.count;
5286 struct msr_bitmap_range ranges[16];
5287
5288 mutex_lock(&kvm->lock);
5289 kvm->arch.msr_filter.count = 0;
5290 memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
5291 mutex_unlock(&kvm->lock);
5292 synchronize_srcu(&kvm->srcu);
5293
5294 for (i = 0; i < count; i++)
5295 kfree(ranges[i].bitmap);
5296}
5297
5298static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range)
5299{
5300 struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
5301 struct msr_bitmap_range range;
5302 unsigned long *bitmap = NULL;
5303 size_t bitmap_size;
5304 int r;
5305
5306 if (!user_range->nmsrs)
5307 return 0;
5308
5309 bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
5310 if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
5311 return -EINVAL;
5312
5313 bitmap = memdup_user((__user u8*)user_range->bitmap, bitmap_size);
5314 if (IS_ERR(bitmap))
5315 return PTR_ERR(bitmap);
5316
5317 range = (struct msr_bitmap_range) {
5318 .flags = user_range->flags,
5319 .base = user_range->base,
5320 .nmsrs = user_range->nmsrs,
5321 .bitmap = bitmap,
5322 };
5323
5324 if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
5325 r = -EINVAL;
5326 goto err;
5327 }
5328
5329 if (!range.flags) {
5330 r = -EINVAL;
5331 goto err;
5332 }
5333
5334
5335 ranges[kvm->arch.msr_filter.count] = range;
5336
5337 smp_wmb();
5338 kvm->arch.msr_filter.count++;
5339
5340 return 0;
5341err:
5342 kfree(bitmap);
5343 return r;
5344}
5345
5346static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
5347{
5348 struct kvm_msr_filter __user *user_msr_filter = argp;
5349 struct kvm_msr_filter filter;
5350 bool default_allow;
5351 int r = 0;
5352 bool empty = true;
5353 u32 i;
5354
5355 if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
5356 return -EFAULT;
5357
5358 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
5359 empty &= !filter.ranges[i].nmsrs;
5360
5361 default_allow = !(filter.flags & KVM_MSR_FILTER_DEFAULT_DENY);
5362 if (empty && !default_allow)
5363 return -EINVAL;
5364
5365 kvm_clear_msr_filter(kvm);
5366
5367 kvm->arch.msr_filter.default_allow = default_allow;
5368
5369
5370
5371
5372
5373 mutex_lock(&kvm->lock);
5374 for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
5375 r = kvm_add_msr_filter(kvm, &filter.ranges[i]);
5376 if (r)
5377 break;
5378 }
5379
5380 kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
5381 mutex_unlock(&kvm->lock);
5382
5383 return r;
5384}
5385
5386long kvm_arch_vm_ioctl(struct file *filp,
5387 unsigned int ioctl, unsigned long arg)
5388{
5389 struct kvm *kvm = filp->private_data;
5390 void __user *argp = (void __user *)arg;
5391 int r = -ENOTTY;
5392
5393
5394
5395
5396
5397 union {
5398 struct kvm_pit_state ps;
5399 struct kvm_pit_state2 ps2;
5400 struct kvm_pit_config pit_config;
5401 } u;
5402
5403 switch (ioctl) {
5404 case KVM_SET_TSS_ADDR:
5405 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
5406 break;
5407 case KVM_SET_IDENTITY_MAP_ADDR: {
5408 u64 ident_addr;
5409
5410 mutex_lock(&kvm->lock);
5411 r = -EINVAL;
5412 if (kvm->created_vcpus)
5413 goto set_identity_unlock;
5414 r = -EFAULT;
5415 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
5416 goto set_identity_unlock;
5417 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
5418set_identity_unlock:
5419 mutex_unlock(&kvm->lock);
5420 break;
5421 }
5422 case KVM_SET_NR_MMU_PAGES:
5423 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
5424 break;
5425 case KVM_GET_NR_MMU_PAGES:
5426 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
5427 break;
5428 case KVM_CREATE_IRQCHIP: {
5429 mutex_lock(&kvm->lock);
5430
5431 r = -EEXIST;
5432 if (irqchip_in_kernel(kvm))
5433 goto create_irqchip_unlock;
5434
5435 r = -EINVAL;
5436 if (kvm->created_vcpus)
5437 goto create_irqchip_unlock;
5438
5439 r = kvm_pic_init(kvm);
5440 if (r)
5441 goto create_irqchip_unlock;
5442
5443 r = kvm_ioapic_init(kvm);
5444 if (r) {
5445 kvm_pic_destroy(kvm);
5446 goto create_irqchip_unlock;
5447 }
5448
5449 r = kvm_setup_default_irq_routing(kvm);
5450 if (r) {
5451 kvm_ioapic_destroy(kvm);
5452 kvm_pic_destroy(kvm);
5453 goto create_irqchip_unlock;
5454 }
5455
5456 smp_wmb();
5457 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
5458 create_irqchip_unlock:
5459 mutex_unlock(&kvm->lock);
5460 break;
5461 }
5462 case KVM_CREATE_PIT:
5463 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
5464 goto create_pit;
5465 case KVM_CREATE_PIT2:
5466 r = -EFAULT;
5467 if (copy_from_user(&u.pit_config, argp,
5468 sizeof(struct kvm_pit_config)))
5469 goto out;
5470 create_pit:
5471 mutex_lock(&kvm->lock);
5472 r = -EEXIST;
5473 if (kvm->arch.vpit)
5474 goto create_pit_unlock;
5475 r = -ENOMEM;
5476 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
5477 if (kvm->arch.vpit)
5478 r = 0;
5479 create_pit_unlock:
5480 mutex_unlock(&kvm->lock);
5481 break;
5482 case KVM_GET_IRQCHIP: {
5483
5484 struct kvm_irqchip *chip;
5485
5486 chip = memdup_user(argp, sizeof(*chip));
5487 if (IS_ERR(chip)) {
5488 r = PTR_ERR(chip);
5489 goto out;
5490 }
5491
5492 r = -ENXIO;
5493 if (!irqchip_kernel(kvm))
5494 goto get_irqchip_out;
5495 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
5496 if (r)
5497 goto get_irqchip_out;
5498 r = -EFAULT;
5499 if (copy_to_user(argp, chip, sizeof(*chip)))
5500 goto get_irqchip_out;
5501 r = 0;
5502 get_irqchip_out:
5503 kfree(chip);
5504 break;
5505 }
5506 case KVM_SET_IRQCHIP: {
5507
5508 struct kvm_irqchip *chip;
5509
5510 chip = memdup_user(argp, sizeof(*chip));
5511 if (IS_ERR(chip)) {
5512 r = PTR_ERR(chip);
5513 goto out;
5514 }
5515
5516 r = -ENXIO;
5517 if (!irqchip_kernel(kvm))
5518 goto set_irqchip_out;
5519 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
5520 set_irqchip_out:
5521 kfree(chip);
5522 break;
5523 }
5524 case KVM_GET_PIT: {
5525 r = -EFAULT;
5526 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
5527 goto out;
5528 r = -ENXIO;
5529 if (!kvm->arch.vpit)
5530 goto out;
5531 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
5532 if (r)
5533 goto out;
5534 r = -EFAULT;
5535 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
5536 goto out;
5537 r = 0;
5538 break;
5539 }
5540 case KVM_SET_PIT: {
5541 r = -EFAULT;
5542 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
5543 goto out;
5544 mutex_lock(&kvm->lock);
5545 r = -ENXIO;
5546 if (!kvm->arch.vpit)
5547 goto set_pit_out;
5548 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
5549set_pit_out:
5550 mutex_unlock(&kvm->lock);
5551 break;
5552 }
5553 case KVM_GET_PIT2: {
5554 r = -ENXIO;
5555 if (!kvm->arch.vpit)
5556 goto out;
5557 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
5558 if (r)
5559 goto out;
5560 r = -EFAULT;
5561 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
5562 goto out;
5563 r = 0;
5564 break;
5565 }
5566 case KVM_SET_PIT2: {
5567 r = -EFAULT;
5568 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
5569 goto out;
5570 mutex_lock(&kvm->lock);
5571 r = -ENXIO;
5572 if (!kvm->arch.vpit)
5573 goto set_pit2_out;
5574 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
5575set_pit2_out:
5576 mutex_unlock(&kvm->lock);
5577 break;
5578 }
5579 case KVM_REINJECT_CONTROL: {
5580 struct kvm_reinject_control control;
5581 r = -EFAULT;
5582 if (copy_from_user(&control, argp, sizeof(control)))
5583 goto out;
5584 r = -ENXIO;
5585 if (!kvm->arch.vpit)
5586 goto out;
5587 r = kvm_vm_ioctl_reinject(kvm, &control);
5588 break;
5589 }
5590 case KVM_SET_BOOT_CPU_ID:
5591 r = 0;
5592 mutex_lock(&kvm->lock);
5593 if (kvm->created_vcpus)
5594 r = -EBUSY;
5595 else
5596 kvm->arch.bsp_vcpu_id = arg;
5597 mutex_unlock(&kvm->lock);
5598 break;
5599 case KVM_XEN_HVM_CONFIG: {
5600 struct kvm_xen_hvm_config xhc;
5601 r = -EFAULT;
5602 if (copy_from_user(&xhc, argp, sizeof(xhc)))
5603 goto out;
5604 r = -EINVAL;
5605 if (xhc.flags)
5606 goto out;
5607 memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
5608 r = 0;
5609 break;
5610 }
5611 case KVM_SET_CLOCK: {
5612 struct kvm_clock_data user_ns;
5613 u64 now_ns;
5614
5615 r = -EFAULT;
5616 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
5617 goto out;
5618
5619 r = -EINVAL;
5620 if (user_ns.flags)
5621 goto out;
5622
5623 r = 0;
5624
5625
5626
5627
5628
5629 kvm_gen_update_masterclock(kvm);
5630 now_ns = get_kvmclock_ns(kvm);
5631 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
5632 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
5633 break;
5634 }
5635 case KVM_GET_CLOCK: {
5636 struct kvm_clock_data user_ns;
5637 u64 now_ns;
5638
5639 now_ns = get_kvmclock_ns(kvm);
5640 user_ns.clock = now_ns;
5641 user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
5642 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
5643
5644 r = -EFAULT;
5645 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
5646 goto out;
5647 r = 0;
5648 break;
5649 }
5650 case KVM_MEMORY_ENCRYPT_OP: {
5651 r = -ENOTTY;
5652 if (kvm_x86_ops.mem_enc_op)
5653 r = kvm_x86_ops.mem_enc_op(kvm, argp);
5654 break;
5655 }
5656 case KVM_MEMORY_ENCRYPT_REG_REGION: {
5657 struct kvm_enc_region region;
5658
5659 r = -EFAULT;
5660 if (copy_from_user(®ion, argp, sizeof(region)))
5661 goto out;
5662
5663 r = -ENOTTY;
5664 if (kvm_x86_ops.mem_enc_reg_region)
5665 r = kvm_x86_ops.mem_enc_reg_region(kvm, ®ion);
5666 break;
5667 }
5668 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
5669 struct kvm_enc_region region;
5670
5671 r = -EFAULT;
5672 if (copy_from_user(®ion, argp, sizeof(region)))
5673 goto out;
5674
5675 r = -ENOTTY;
5676 if (kvm_x86_ops.mem_enc_unreg_region)
5677 r = kvm_x86_ops.mem_enc_unreg_region(kvm, ®ion);
5678 break;
5679 }
5680 case KVM_HYPERV_EVENTFD: {
5681 struct kvm_hyperv_eventfd hvevfd;
5682
5683 r = -EFAULT;
5684 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
5685 goto out;
5686 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
5687 break;
5688 }
5689 case KVM_SET_PMU_EVENT_FILTER:
5690 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
5691 break;
5692 case KVM_X86_SET_MSR_FILTER:
5693 r = kvm_vm_ioctl_set_msr_filter(kvm, argp);
5694 break;
5695 default:
5696 r = -ENOTTY;
5697 }
5698out:
5699 return r;
5700}
5701
5702static void kvm_init_msr_list(void)
5703{
5704 struct x86_pmu_capability x86_pmu;
5705 u32 dummy[2];
5706 unsigned i;
5707
5708 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
5709 "Please update the fixed PMCs in msrs_to_saved_all[]");
5710
5711 perf_get_x86_pmu_capability(&x86_pmu);
5712
5713 num_msrs_to_save = 0;
5714 num_emulated_msrs = 0;
5715 num_msr_based_features = 0;
5716
5717 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
5718 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
5719 continue;
5720
5721
5722
5723
5724
5725 switch (msrs_to_save_all[i]) {
5726 case MSR_IA32_BNDCFGS:
5727 if (!kvm_mpx_supported())
5728 continue;
5729 break;
5730 case MSR_TSC_AUX:
5731 if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
5732 continue;
5733 break;
5734 case MSR_IA32_UMWAIT_CONTROL:
5735 if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
5736 continue;
5737 break;
5738 case MSR_IA32_RTIT_CTL:
5739 case MSR_IA32_RTIT_STATUS:
5740 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
5741 continue;
5742 break;
5743 case MSR_IA32_RTIT_CR3_MATCH:
5744 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
5745 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
5746 continue;
5747 break;
5748 case MSR_IA32_RTIT_OUTPUT_BASE:
5749 case MSR_IA32_RTIT_OUTPUT_MASK:
5750 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
5751 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
5752 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
5753 continue;
5754 break;
5755 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
5756 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
5757 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
5758 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
5759 continue;
5760 break;
5761 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
5762 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
5763 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5764 continue;
5765 break;
5766 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
5767 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
5768 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5769 continue;
5770 break;
5771 default:
5772 break;
5773 }
5774
5775 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
5776 }
5777
5778 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
5779 if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i]))
5780 continue;
5781
5782 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
5783 }
5784
5785 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
5786 struct kvm_msr_entry msr;
5787
5788 msr.index = msr_based_features_all[i];
5789 if (kvm_get_msr_feature(&msr))
5790 continue;
5791
5792 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
5793 }
5794}
5795
5796static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
5797 const void *v)
5798{
5799 int handled = 0;
5800 int n;
5801
5802 do {
5803 n = min(len, 8);
5804 if (!(lapic_in_kernel(vcpu) &&
5805 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
5806 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
5807 break;
5808 handled += n;
5809 addr += n;
5810 len -= n;
5811 v += n;
5812 } while (len);
5813
5814 return handled;
5815}
5816
5817static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
5818{
5819 int handled = 0;
5820 int n;
5821
5822 do {
5823 n = min(len, 8);
5824 if (!(lapic_in_kernel(vcpu) &&
5825 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
5826 addr, n, v))
5827 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
5828 break;
5829 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
5830 handled += n;
5831 addr += n;
5832 len -= n;
5833 v += n;
5834 } while (len);
5835
5836 return handled;
5837}
5838
5839static void kvm_set_segment(struct kvm_vcpu *vcpu,
5840 struct kvm_segment *var, int seg)
5841{
5842 kvm_x86_ops.set_segment(vcpu, var, seg);
5843}
5844
5845void kvm_get_segment(struct kvm_vcpu *vcpu,
5846 struct kvm_segment *var, int seg)
5847{
5848 kvm_x86_ops.get_segment(vcpu, var, seg);
5849}
5850
5851gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
5852 struct x86_exception *exception)
5853{
5854 gpa_t t_gpa;
5855
5856 BUG_ON(!mmu_is_nested(vcpu));
5857
5858
5859 access |= PFERR_USER_MASK;
5860 t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
5861
5862 return t_gpa;
5863}
5864
5865gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
5866 struct x86_exception *exception)
5867{
5868 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5869 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5870}
5871
5872 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
5873 struct x86_exception *exception)
5874{
5875 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5876 access |= PFERR_FETCH_MASK;
5877 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5878}
5879
5880gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
5881 struct x86_exception *exception)
5882{
5883 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5884 access |= PFERR_WRITE_MASK;
5885 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5886}
5887
5888
5889gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
5890 struct x86_exception *exception)
5891{
5892 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
5893}
5894
5895static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5896 struct kvm_vcpu *vcpu, u32 access,
5897 struct x86_exception *exception)
5898{
5899 void *data = val;
5900 int r = X86EMUL_CONTINUE;
5901
5902 while (bytes) {
5903 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
5904 exception);
5905 unsigned offset = addr & (PAGE_SIZE-1);
5906 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
5907 int ret;
5908
5909 if (gpa == UNMAPPED_GVA)
5910 return X86EMUL_PROPAGATE_FAULT;
5911 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
5912 offset, toread);
5913 if (ret < 0) {
5914 r = X86EMUL_IO_NEEDED;
5915 goto out;
5916 }
5917
5918 bytes -= toread;
5919 data += toread;
5920 addr += toread;
5921 }
5922out:
5923 return r;
5924}
5925
5926
5927static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
5928 gva_t addr, void *val, unsigned int bytes,
5929 struct x86_exception *exception)
5930{
5931 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5932 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5933 unsigned offset;
5934 int ret;
5935
5936
5937 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
5938 exception);
5939 if (unlikely(gpa == UNMAPPED_GVA))
5940 return X86EMUL_PROPAGATE_FAULT;
5941
5942 offset = addr & (PAGE_SIZE-1);
5943 if (WARN_ON(offset + bytes > PAGE_SIZE))
5944 bytes = (unsigned)PAGE_SIZE - offset;
5945 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
5946 offset, bytes);
5947 if (unlikely(ret < 0))
5948 return X86EMUL_IO_NEEDED;
5949
5950 return X86EMUL_CONTINUE;
5951}
5952
5953int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
5954 gva_t addr, void *val, unsigned int bytes,
5955 struct x86_exception *exception)
5956{
5957 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5958
5959
5960
5961
5962
5963
5964
5965 memset(exception, 0, sizeof(*exception));
5966 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
5967 exception);
5968}
5969EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
5970
5971static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
5972 gva_t addr, void *val, unsigned int bytes,
5973 struct x86_exception *exception, bool system)
5974{
5975 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5976 u32 access = 0;
5977
5978 if (!system && kvm_x86_ops.get_cpl(vcpu) == 3)
5979 access |= PFERR_USER_MASK;
5980
5981 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
5982}
5983
5984static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
5985 unsigned long addr, void *val, unsigned int bytes)
5986{
5987 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5988 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
5989
5990 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
5991}
5992
5993static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5994 struct kvm_vcpu *vcpu, u32 access,
5995 struct x86_exception *exception)
5996{
5997 void *data = val;
5998 int r = X86EMUL_CONTINUE;
5999
6000 while (bytes) {
6001 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
6002 access,
6003 exception);
6004 unsigned offset = addr & (PAGE_SIZE-1);
6005 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
6006 int ret;
6007
6008 if (gpa == UNMAPPED_GVA)
6009 return X86EMUL_PROPAGATE_FAULT;
6010 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
6011 if (ret < 0) {
6012 r = X86EMUL_IO_NEEDED;
6013 goto out;
6014 }
6015
6016 bytes -= towrite;
6017 data += towrite;
6018 addr += towrite;
6019 }
6020out:
6021 return r;
6022}
6023
6024static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
6025 unsigned int bytes, struct x86_exception *exception,
6026 bool system)
6027{
6028 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6029 u32 access = PFERR_WRITE_MASK;
6030
6031 if (!system && kvm_x86_ops.get_cpl(vcpu) == 3)
6032 access |= PFERR_USER_MASK;
6033
6034 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
6035 access, exception);
6036}
6037
6038int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
6039 unsigned int bytes, struct x86_exception *exception)
6040{
6041
6042 vcpu->arch.l1tf_flush_l1d = true;
6043
6044 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
6045 PFERR_WRITE_MASK, exception);
6046}
6047EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
6048
6049int handle_ud(struct kvm_vcpu *vcpu)
6050{
6051 static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
6052 int emul_type = EMULTYPE_TRAP_UD;
6053 char sig[5];
6054 struct x86_exception e;
6055
6056 if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, NULL, 0)))
6057 return 1;
6058
6059 if (force_emulation_prefix &&
6060 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
6061 sig, sizeof(sig), &e) == 0 &&
6062 memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
6063 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
6064 emul_type = EMULTYPE_TRAP_UD_FORCED;
6065 }
6066
6067 return kvm_emulate_instruction(vcpu, emul_type);
6068}
6069EXPORT_SYMBOL_GPL(handle_ud);
6070
6071static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
6072 gpa_t gpa, bool write)
6073{
6074
6075 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
6076 return 1;
6077
6078 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
6079 trace_vcpu_match_mmio(gva, gpa, write, true);
6080 return 1;
6081 }
6082
6083 return 0;
6084}
6085
6086static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
6087 gpa_t *gpa, struct x86_exception *exception,
6088 bool write)
6089{
6090 u32 access = ((kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
6091 | (write ? PFERR_WRITE_MASK : 0);
6092
6093
6094
6095
6096
6097
6098 if (vcpu_match_mmio_gva(vcpu, gva)
6099 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
6100 vcpu->arch.mmio_access, 0, access)) {
6101 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
6102 (gva & (PAGE_SIZE - 1));
6103 trace_vcpu_match_mmio(gva, *gpa, write, false);
6104 return 1;
6105 }
6106
6107 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
6108
6109 if (*gpa == UNMAPPED_GVA)
6110 return -1;
6111
6112 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
6113}
6114
6115int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
6116 const void *val, int bytes)
6117{
6118 int ret;
6119
6120 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
6121 if (ret < 0)
6122 return 0;
6123 kvm_page_track_write(vcpu, gpa, val, bytes);
6124 return 1;
6125}
6126
6127struct read_write_emulator_ops {
6128 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
6129 int bytes);
6130 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
6131 void *val, int bytes);
6132 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
6133 int bytes, void *val);
6134 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
6135 void *val, int bytes);
6136 bool write;
6137};
6138
6139static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
6140{
6141 if (vcpu->mmio_read_completed) {
6142 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
6143 vcpu->mmio_fragments[0].gpa, val);
6144 vcpu->mmio_read_completed = 0;
6145 return 1;
6146 }
6147
6148 return 0;
6149}
6150
6151static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
6152 void *val, int bytes)
6153{
6154 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
6155}
6156
6157static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
6158 void *val, int bytes)
6159{
6160 return emulator_write_phys(vcpu, gpa, val, bytes);
6161}
6162
6163static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
6164{
6165 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
6166 return vcpu_mmio_write(vcpu, gpa, bytes, val);
6167}
6168
6169static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
6170 void *val, int bytes)
6171{
6172 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
6173 return X86EMUL_IO_NEEDED;
6174}
6175
6176static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
6177 void *val, int bytes)
6178{
6179 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
6180
6181 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
6182 return X86EMUL_CONTINUE;
6183}
6184
6185static const struct read_write_emulator_ops read_emultor = {
6186 .read_write_prepare = read_prepare,
6187 .read_write_emulate = read_emulate,
6188 .read_write_mmio = vcpu_mmio_read,
6189 .read_write_exit_mmio = read_exit_mmio,
6190};
6191
6192static const struct read_write_emulator_ops write_emultor = {
6193 .read_write_emulate = write_emulate,
6194 .read_write_mmio = write_mmio,
6195 .read_write_exit_mmio = write_exit_mmio,
6196 .write = true,
6197};
6198
6199static int emulator_read_write_onepage(unsigned long addr, void *val,
6200 unsigned int bytes,
6201 struct x86_exception *exception,
6202 struct kvm_vcpu *vcpu,
6203 const struct read_write_emulator_ops *ops)
6204{
6205 gpa_t gpa;
6206 int handled, ret;
6207 bool write = ops->write;
6208 struct kvm_mmio_fragment *frag;
6209 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6210
6211
6212
6213
6214
6215
6216
6217
6218 if (ctxt->gpa_available && emulator_can_use_gpa(ctxt) &&
6219 (addr & ~PAGE_MASK) == (ctxt->gpa_val & ~PAGE_MASK)) {
6220 gpa = ctxt->gpa_val;
6221 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
6222 } else {
6223 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
6224 if (ret < 0)
6225 return X86EMUL_PROPAGATE_FAULT;
6226 }
6227
6228 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
6229 return X86EMUL_CONTINUE;
6230
6231
6232
6233
6234 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
6235 if (handled == bytes)
6236 return X86EMUL_CONTINUE;
6237
6238 gpa += handled;
6239 bytes -= handled;
6240 val += handled;
6241
6242 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
6243 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
6244 frag->gpa = gpa;
6245 frag->data = val;
6246 frag->len = bytes;
6247 return X86EMUL_CONTINUE;
6248}
6249
6250static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
6251 unsigned long addr,
6252 void *val, unsigned int bytes,
6253 struct x86_exception *exception,
6254 const struct read_write_emulator_ops *ops)
6255{
6256 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6257 gpa_t gpa;
6258 int rc;
6259
6260 if (ops->read_write_prepare &&
6261 ops->read_write_prepare(vcpu, val, bytes))
6262 return X86EMUL_CONTINUE;
6263
6264 vcpu->mmio_nr_fragments = 0;
6265
6266
6267 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
6268 int now;
6269
6270 now = -addr & ~PAGE_MASK;
6271 rc = emulator_read_write_onepage(addr, val, now, exception,
6272 vcpu, ops);
6273
6274 if (rc != X86EMUL_CONTINUE)
6275 return rc;
6276 addr += now;
6277 if (ctxt->mode != X86EMUL_MODE_PROT64)
6278 addr = (u32)addr;
6279 val += now;
6280 bytes -= now;
6281 }
6282
6283 rc = emulator_read_write_onepage(addr, val, bytes, exception,
6284 vcpu, ops);
6285 if (rc != X86EMUL_CONTINUE)
6286 return rc;
6287
6288 if (!vcpu->mmio_nr_fragments)
6289 return rc;
6290
6291 gpa = vcpu->mmio_fragments[0].gpa;
6292
6293 vcpu->mmio_needed = 1;
6294 vcpu->mmio_cur_fragment = 0;
6295
6296 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
6297 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
6298 vcpu->run->exit_reason = KVM_EXIT_MMIO;
6299 vcpu->run->mmio.phys_addr = gpa;
6300
6301 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
6302}
6303
6304static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
6305 unsigned long addr,
6306 void *val,
6307 unsigned int bytes,
6308 struct x86_exception *exception)
6309{
6310 return emulator_read_write(ctxt, addr, val, bytes,
6311 exception, &read_emultor);
6312}
6313
6314static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
6315 unsigned long addr,
6316 const void *val,
6317 unsigned int bytes,
6318 struct x86_exception *exception)
6319{
6320 return emulator_read_write(ctxt, addr, (void *)val, bytes,
6321 exception, &write_emultor);
6322}
6323
6324#define CMPXCHG_TYPE(t, ptr, old, new) \
6325 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
6326
6327#ifdef CONFIG_X86_64
6328# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
6329#else
6330# define CMPXCHG64(ptr, old, new) \
6331 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
6332#endif
6333
6334static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
6335 unsigned long addr,
6336 const void *old,
6337 const void *new,
6338 unsigned int bytes,
6339 struct x86_exception *exception)
6340{
6341 struct kvm_host_map map;
6342 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6343 u64 page_line_mask;
6344 gpa_t gpa;
6345 char *kaddr;
6346 bool exchanged;
6347
6348
6349 if (bytes > 8 || (bytes & (bytes - 1)))
6350 goto emul_write;
6351
6352 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
6353
6354 if (gpa == UNMAPPED_GVA ||
6355 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
6356 goto emul_write;
6357
6358
6359
6360
6361
6362 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
6363 page_line_mask = ~(cache_line_size() - 1);
6364 else
6365 page_line_mask = PAGE_MASK;
6366
6367 if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
6368 goto emul_write;
6369
6370 if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
6371 goto emul_write;
6372
6373 kaddr = map.hva + offset_in_page(gpa);
6374
6375 switch (bytes) {
6376 case 1:
6377 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
6378 break;
6379 case 2:
6380 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
6381 break;
6382 case 4:
6383 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
6384 break;
6385 case 8:
6386 exchanged = CMPXCHG64(kaddr, old, new);
6387 break;
6388 default:
6389 BUG();
6390 }
6391
6392 kvm_vcpu_unmap(vcpu, &map, true);
6393
6394 if (!exchanged)
6395 return X86EMUL_CMPXCHG_FAILED;
6396
6397 kvm_page_track_write(vcpu, gpa, new, bytes);
6398
6399 return X86EMUL_CONTINUE;
6400
6401emul_write:
6402 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
6403
6404 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
6405}
6406
6407static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
6408{
6409 int r = 0, i;
6410
6411 for (i = 0; i < vcpu->arch.pio.count; i++) {
6412 if (vcpu->arch.pio.in)
6413 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
6414 vcpu->arch.pio.size, pd);
6415 else
6416 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
6417 vcpu->arch.pio.port, vcpu->arch.pio.size,
6418 pd);
6419 if (r)
6420 break;
6421 pd += vcpu->arch.pio.size;
6422 }
6423 return r;
6424}
6425
6426static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
6427 unsigned short port, void *val,
6428 unsigned int count, bool in)
6429{
6430 vcpu->arch.pio.port = port;
6431 vcpu->arch.pio.in = in;
6432 vcpu->arch.pio.count = count;
6433 vcpu->arch.pio.size = size;
6434
6435 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
6436 vcpu->arch.pio.count = 0;
6437 return 1;
6438 }
6439
6440 vcpu->run->exit_reason = KVM_EXIT_IO;
6441 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
6442 vcpu->run->io.size = size;
6443 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
6444 vcpu->run->io.count = count;
6445 vcpu->run->io.port = port;
6446
6447 return 0;
6448}
6449
6450static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
6451 unsigned short port, void *val, unsigned int count)
6452{
6453 int ret;
6454
6455 if (vcpu->arch.pio.count)
6456 goto data_avail;
6457
6458 memset(vcpu->arch.pio_data, 0, size * count);
6459
6460 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
6461 if (ret) {
6462data_avail:
6463 memcpy(val, vcpu->arch.pio_data, size * count);
6464 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
6465 vcpu->arch.pio.count = 0;
6466 return 1;
6467 }
6468
6469 return 0;
6470}
6471
6472static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
6473 int size, unsigned short port, void *val,
6474 unsigned int count)
6475{
6476 return emulator_pio_in(emul_to_vcpu(ctxt), size, port, val, count);
6477
6478}
6479
6480static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
6481 unsigned short port, const void *val,
6482 unsigned int count)
6483{
6484 memcpy(vcpu->arch.pio_data, val, size * count);
6485 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
6486 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
6487}
6488
6489static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
6490 int size, unsigned short port,
6491 const void *val, unsigned int count)
6492{
6493 return emulator_pio_out(emul_to_vcpu(ctxt), size, port, val, count);
6494}
6495
6496static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
6497{
6498 return kvm_x86_ops.get_segment_base(vcpu, seg);
6499}
6500
6501static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
6502{
6503 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
6504}
6505
6506static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
6507{
6508 if (!need_emulate_wbinvd(vcpu))
6509 return X86EMUL_CONTINUE;
6510
6511 if (kvm_x86_ops.has_wbinvd_exit()) {
6512 int cpu = get_cpu();
6513
6514 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
6515 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
6516 wbinvd_ipi, NULL, 1);
6517 put_cpu();
6518 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
6519 } else
6520 wbinvd();
6521 return X86EMUL_CONTINUE;
6522}
6523
6524int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
6525{
6526 kvm_emulate_wbinvd_noskip(vcpu);
6527 return kvm_skip_emulated_instruction(vcpu);
6528}
6529EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
6530
6531
6532
6533static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
6534{
6535 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
6536}
6537
6538static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
6539 unsigned long *dest)
6540{
6541 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
6542}
6543
6544static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
6545 unsigned long value)
6546{
6547
6548 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
6549}
6550
6551static u64 mk_cr_64(u64 curr_cr, u32 new_val)
6552{
6553 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
6554}
6555
6556static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
6557{
6558 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6559 unsigned long value;
6560
6561 switch (cr) {
6562 case 0:
6563 value = kvm_read_cr0(vcpu);
6564 break;
6565 case 2:
6566 value = vcpu->arch.cr2;
6567 break;
6568 case 3:
6569 value = kvm_read_cr3(vcpu);
6570 break;
6571 case 4:
6572 value = kvm_read_cr4(vcpu);
6573 break;
6574 case 8:
6575 value = kvm_get_cr8(vcpu);
6576 break;
6577 default:
6578 kvm_err("%s: unexpected cr %u\n", __func__, cr);
6579 return 0;
6580 }
6581
6582 return value;
6583}
6584
6585static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
6586{
6587 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6588 int res = 0;
6589
6590 switch (cr) {
6591 case 0:
6592 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
6593 break;
6594 case 2:
6595 vcpu->arch.cr2 = val;
6596 break;
6597 case 3:
6598 res = kvm_set_cr3(vcpu, val);
6599 break;
6600 case 4:
6601 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
6602 break;
6603 case 8:
6604 res = kvm_set_cr8(vcpu, val);
6605 break;
6606 default:
6607 kvm_err("%s: unexpected cr %u\n", __func__, cr);
6608 res = -1;
6609 }
6610
6611 return res;
6612}
6613
6614static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
6615{
6616 return kvm_x86_ops.get_cpl(emul_to_vcpu(ctxt));
6617}
6618
6619static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6620{
6621 kvm_x86_ops.get_gdt(emul_to_vcpu(ctxt), dt);
6622}
6623
6624static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6625{
6626 kvm_x86_ops.get_idt(emul_to_vcpu(ctxt), dt);
6627}
6628
6629static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6630{
6631 kvm_x86_ops.set_gdt(emul_to_vcpu(ctxt), dt);
6632}
6633
6634static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6635{
6636 kvm_x86_ops.set_idt(emul_to_vcpu(ctxt), dt);
6637}
6638
6639static unsigned long emulator_get_cached_segment_base(
6640 struct x86_emulate_ctxt *ctxt, int seg)
6641{
6642 return get_segment_base(emul_to_vcpu(ctxt), seg);
6643}
6644
6645static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
6646 struct desc_struct *desc, u32 *base3,
6647 int seg)
6648{
6649 struct kvm_segment var;
6650
6651 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
6652 *selector = var.selector;
6653
6654 if (var.unusable) {
6655 memset(desc, 0, sizeof(*desc));
6656 if (base3)
6657 *base3 = 0;
6658 return false;
6659 }
6660
6661 if (var.g)
6662 var.limit >>= 12;
6663 set_desc_limit(desc, var.limit);
6664 set_desc_base(desc, (unsigned long)var.base);
6665#ifdef CONFIG_X86_64
6666 if (base3)
6667 *base3 = var.base >> 32;
6668#endif
6669 desc->type = var.type;
6670 desc->s = var.s;
6671 desc->dpl = var.dpl;
6672 desc->p = var.present;
6673 desc->avl = var.avl;
6674 desc->l = var.l;
6675 desc->d = var.db;
6676 desc->g = var.g;
6677
6678 return true;
6679}
6680
6681static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
6682 struct desc_struct *desc, u32 base3,
6683 int seg)
6684{
6685 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6686 struct kvm_segment var;
6687
6688 var.selector = selector;
6689 var.base = get_desc_base(desc);
6690#ifdef CONFIG_X86_64
6691 var.base |= ((u64)base3) << 32;
6692#endif
6693 var.limit = get_desc_limit(desc);
6694 if (desc->g)
6695 var.limit = (var.limit << 12) | 0xfff;
6696 var.type = desc->type;
6697 var.dpl = desc->dpl;
6698 var.db = desc->d;
6699 var.s = desc->s;
6700 var.l = desc->l;
6701 var.g = desc->g;
6702 var.avl = desc->avl;
6703 var.present = desc->p;
6704 var.unusable = !var.present;
6705 var.padding = 0;
6706
6707 kvm_set_segment(vcpu, &var, seg);
6708 return;
6709}
6710
6711static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
6712 u32 msr_index, u64 *pdata)
6713{
6714 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6715 int r;
6716
6717 r = kvm_get_msr(vcpu, msr_index, pdata);
6718
6719 if (r && kvm_get_msr_user_space(vcpu, msr_index, r)) {
6720
6721 return X86EMUL_IO_NEEDED;
6722 }
6723
6724 return r;
6725}
6726
6727static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
6728 u32 msr_index, u64 data)
6729{
6730 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6731 int r;
6732
6733 r = kvm_set_msr(vcpu, msr_index, data);
6734
6735 if (r && kvm_set_msr_user_space(vcpu, msr_index, data, r)) {
6736
6737 return X86EMUL_IO_NEEDED;
6738 }
6739
6740 return r;
6741}
6742
6743static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
6744{
6745 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6746
6747 return vcpu->arch.smbase;
6748}
6749
6750static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
6751{
6752 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6753
6754 vcpu->arch.smbase = smbase;
6755}
6756
6757static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
6758 u32 pmc)
6759{
6760 return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
6761}
6762
6763static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
6764 u32 pmc, u64 *pdata)
6765{
6766 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
6767}
6768
6769static void emulator_halt(struct x86_emulate_ctxt *ctxt)
6770{
6771 emul_to_vcpu(ctxt)->arch.halt_request = 1;
6772}
6773
6774static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
6775 struct x86_instruction_info *info,
6776 enum x86_intercept_stage stage)
6777{
6778 return kvm_x86_ops.check_intercept(emul_to_vcpu(ctxt), info, stage,
6779 &ctxt->exception);
6780}
6781
6782static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
6783 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx,
6784 bool exact_only)
6785{
6786 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
6787}
6788
6789static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
6790{
6791 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
6792}
6793
6794static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
6795{
6796 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
6797}
6798
6799static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
6800{
6801 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
6802}
6803
6804static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
6805{
6806 return kvm_register_read(emul_to_vcpu(ctxt), reg);
6807}
6808
6809static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
6810{
6811 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
6812}
6813
6814static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
6815{
6816 kvm_x86_ops.set_nmi_mask(emul_to_vcpu(ctxt), masked);
6817}
6818
6819static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
6820{
6821 return emul_to_vcpu(ctxt)->arch.hflags;
6822}
6823
6824static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
6825{
6826 emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
6827}
6828
6829static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
6830 const char *smstate)
6831{
6832 return kvm_x86_ops.pre_leave_smm(emul_to_vcpu(ctxt), smstate);
6833}
6834
6835static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
6836{
6837 kvm_smm_changed(emul_to_vcpu(ctxt));
6838}
6839
6840static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
6841{
6842 return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
6843}
6844
6845static const struct x86_emulate_ops emulate_ops = {
6846 .read_gpr = emulator_read_gpr,
6847 .write_gpr = emulator_write_gpr,
6848 .read_std = emulator_read_std,
6849 .write_std = emulator_write_std,
6850 .read_phys = kvm_read_guest_phys_system,
6851 .fetch = kvm_fetch_guest_virt,
6852 .read_emulated = emulator_read_emulated,
6853 .write_emulated = emulator_write_emulated,
6854 .cmpxchg_emulated = emulator_cmpxchg_emulated,
6855 .invlpg = emulator_invlpg,
6856 .pio_in_emulated = emulator_pio_in_emulated,
6857 .pio_out_emulated = emulator_pio_out_emulated,
6858 .get_segment = emulator_get_segment,
6859 .set_segment = emulator_set_segment,
6860 .get_cached_segment_base = emulator_get_cached_segment_base,
6861 .get_gdt = emulator_get_gdt,
6862 .get_idt = emulator_get_idt,
6863 .set_gdt = emulator_set_gdt,
6864 .set_idt = emulator_set_idt,
6865 .get_cr = emulator_get_cr,
6866 .set_cr = emulator_set_cr,
6867 .cpl = emulator_get_cpl,
6868 .get_dr = emulator_get_dr,
6869 .set_dr = emulator_set_dr,
6870 .get_smbase = emulator_get_smbase,
6871 .set_smbase = emulator_set_smbase,
6872 .set_msr = emulator_set_msr,
6873 .get_msr = emulator_get_msr,
6874 .check_pmc = emulator_check_pmc,
6875 .read_pmc = emulator_read_pmc,
6876 .halt = emulator_halt,
6877 .wbinvd = emulator_wbinvd,
6878 .fix_hypercall = emulator_fix_hypercall,
6879 .intercept = emulator_intercept,
6880 .get_cpuid = emulator_get_cpuid,
6881 .guest_has_long_mode = emulator_guest_has_long_mode,
6882 .guest_has_movbe = emulator_guest_has_movbe,
6883 .guest_has_fxsr = emulator_guest_has_fxsr,
6884 .set_nmi_mask = emulator_set_nmi_mask,
6885 .get_hflags = emulator_get_hflags,
6886 .set_hflags = emulator_set_hflags,
6887 .pre_leave_smm = emulator_pre_leave_smm,
6888 .post_leave_smm = emulator_post_leave_smm,
6889 .set_xcr = emulator_set_xcr,
6890};
6891
6892static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
6893{
6894 u32 int_shadow = kvm_x86_ops.get_interrupt_shadow(vcpu);
6895
6896
6897
6898
6899
6900
6901
6902 if (int_shadow & mask)
6903 mask = 0;
6904 if (unlikely(int_shadow || mask)) {
6905 kvm_x86_ops.set_interrupt_shadow(vcpu, mask);
6906 if (!mask)
6907 kvm_make_request(KVM_REQ_EVENT, vcpu);
6908 }
6909}
6910
6911static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
6912{
6913 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6914 if (ctxt->exception.vector == PF_VECTOR)
6915 return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
6916
6917 if (ctxt->exception.error_code_valid)
6918 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
6919 ctxt->exception.error_code);
6920 else
6921 kvm_queue_exception(vcpu, ctxt->exception.vector);
6922 return false;
6923}
6924
6925static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
6926{
6927 struct x86_emulate_ctxt *ctxt;
6928
6929 ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT);
6930 if (!ctxt) {
6931 pr_err("kvm: failed to allocate vcpu's emulator\n");
6932 return NULL;
6933 }
6934
6935 ctxt->vcpu = vcpu;
6936 ctxt->ops = &emulate_ops;
6937 vcpu->arch.emulate_ctxt = ctxt;
6938
6939 return ctxt;
6940}
6941
6942static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
6943{
6944 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6945 int cs_db, cs_l;
6946
6947 kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
6948
6949 ctxt->gpa_available = false;
6950 ctxt->eflags = kvm_get_rflags(vcpu);
6951 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
6952
6953 ctxt->eip = kvm_rip_read(vcpu);
6954 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
6955 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
6956 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
6957 cs_db ? X86EMUL_MODE_PROT32 :
6958 X86EMUL_MODE_PROT16;
6959 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
6960 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
6961 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
6962
6963 init_decode_cache(ctxt);
6964 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6965}
6966
6967void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
6968{
6969 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6970 int ret;
6971
6972 init_emulate_ctxt(vcpu);
6973
6974 ctxt->op_bytes = 2;
6975 ctxt->ad_bytes = 2;
6976 ctxt->_eip = ctxt->eip + inc_eip;
6977 ret = emulate_int_real(ctxt, irq);
6978
6979 if (ret != X86EMUL_CONTINUE) {
6980 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
6981 } else {
6982 ctxt->eip = ctxt->_eip;
6983 kvm_rip_write(vcpu, ctxt->eip);
6984 kvm_set_rflags(vcpu, ctxt->eflags);
6985 }
6986}
6987EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
6988
6989static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
6990{
6991 ++vcpu->stat.insn_emulation_fail;
6992 trace_kvm_emulate_insn_failed(vcpu);
6993
6994 if (emulation_type & EMULTYPE_VMWARE_GP) {
6995 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
6996 return 1;
6997 }
6998
6999 if (emulation_type & EMULTYPE_SKIP) {
7000 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7001 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7002 vcpu->run->internal.ndata = 0;
7003 return 0;
7004 }
7005
7006 kvm_queue_exception(vcpu, UD_VECTOR);
7007
7008 if (!is_guest_mode(vcpu) && kvm_x86_ops.get_cpl(vcpu) == 0) {
7009 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
7010 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
7011 vcpu->run->internal.ndata = 0;
7012 return 0;
7013 }
7014
7015 return 1;
7016}
7017
7018static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
7019 bool write_fault_to_shadow_pgtable,
7020 int emulation_type)
7021{
7022 gpa_t gpa = cr2_or_gpa;
7023 kvm_pfn_t pfn;
7024
7025 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
7026 return false;
7027
7028 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
7029 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
7030 return false;
7031
7032 if (!vcpu->arch.mmu->direct_map) {
7033
7034
7035
7036
7037 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
7038
7039
7040
7041
7042
7043 if (gpa == UNMAPPED_GVA)
7044 return true;
7045 }
7046
7047
7048
7049
7050
7051
7052
7053 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
7054
7055
7056
7057
7058
7059 if (is_error_noslot_pfn(pfn))
7060 return false;
7061
7062 kvm_release_pfn_clean(pfn);
7063
7064
7065 if (vcpu->arch.mmu->direct_map) {
7066 unsigned int indirect_shadow_pages;
7067
7068 spin_lock(&vcpu->kvm->mmu_lock);
7069 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
7070 spin_unlock(&vcpu->kvm->mmu_lock);
7071
7072 if (indirect_shadow_pages)
7073 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7074
7075 return true;
7076 }
7077
7078
7079
7080
7081
7082
7083 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7084
7085
7086
7087
7088
7089
7090 return !write_fault_to_shadow_pgtable;
7091}
7092
7093static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
7094 gpa_t cr2_or_gpa, int emulation_type)
7095{
7096 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7097 unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
7098
7099 last_retry_eip = vcpu->arch.last_retry_eip;
7100 last_retry_addr = vcpu->arch.last_retry_addr;
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
7116
7117 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
7118 return false;
7119
7120 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
7121 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
7122 return false;
7123
7124 if (x86_page_table_writing_insn(ctxt))
7125 return false;
7126
7127 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
7128 return false;
7129
7130 vcpu->arch.last_retry_eip = ctxt->eip;
7131 vcpu->arch.last_retry_addr = cr2_or_gpa;
7132
7133 if (!vcpu->arch.mmu->direct_map)
7134 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
7135
7136 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
7137
7138 return true;
7139}
7140
7141static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
7142static int complete_emulated_pio(struct kvm_vcpu *vcpu);
7143
7144static void kvm_smm_changed(struct kvm_vcpu *vcpu)
7145{
7146 if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
7147
7148 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
7149
7150
7151 kvm_make_request(KVM_REQ_EVENT, vcpu);
7152 }
7153
7154 kvm_mmu_reset_context(vcpu);
7155}
7156
7157static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
7158 unsigned long *db)
7159{
7160 u32 dr6 = 0;
7161 int i;
7162 u32 enable, rwlen;
7163
7164 enable = dr7;
7165 rwlen = dr7 >> 16;
7166 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
7167 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
7168 dr6 |= (1 << i);
7169 return dr6;
7170}
7171
7172static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
7173{
7174 struct kvm_run *kvm_run = vcpu->run;
7175
7176 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
7177 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
7178 kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
7179 kvm_run->debug.arch.exception = DB_VECTOR;
7180 kvm_run->exit_reason = KVM_EXIT_DEBUG;
7181 return 0;
7182 }
7183 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
7184 return 1;
7185}
7186
7187int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
7188{
7189 unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
7190 int r;
7191
7192 r = kvm_x86_ops.skip_emulated_instruction(vcpu);
7193 if (unlikely(!r))
7194 return 0;
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204 if (unlikely(rflags & X86_EFLAGS_TF))
7205 r = kvm_vcpu_do_singlestep(vcpu);
7206 return r;
7207}
7208EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
7209
7210static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
7211{
7212 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
7213 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
7214 struct kvm_run *kvm_run = vcpu->run;
7215 unsigned long eip = kvm_get_linear_rip(vcpu);
7216 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
7217 vcpu->arch.guest_debug_dr7,
7218 vcpu->arch.eff_db);
7219
7220 if (dr6 != 0) {
7221 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
7222 kvm_run->debug.arch.pc = eip;
7223 kvm_run->debug.arch.exception = DB_VECTOR;
7224 kvm_run->exit_reason = KVM_EXIT_DEBUG;
7225 *r = 0;
7226 return true;
7227 }
7228 }
7229
7230 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
7231 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
7232 unsigned long eip = kvm_get_linear_rip(vcpu);
7233 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
7234 vcpu->arch.dr7,
7235 vcpu->arch.db);
7236
7237 if (dr6 != 0) {
7238 kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
7239 *r = 1;
7240 return true;
7241 }
7242 }
7243
7244 return false;
7245}
7246
7247static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
7248{
7249 switch (ctxt->opcode_len) {
7250 case 1:
7251 switch (ctxt->b) {
7252 case 0xe4:
7253 case 0xe5:
7254 case 0xec:
7255 case 0xed:
7256 case 0xe6:
7257 case 0xe7:
7258 case 0xee:
7259 case 0xef:
7260 case 0x6c:
7261 case 0x6d:
7262 case 0x6e:
7263 case 0x6f:
7264 return true;
7265 }
7266 break;
7267 case 2:
7268 switch (ctxt->b) {
7269 case 0x33:
7270 return true;
7271 }
7272 break;
7273 }
7274
7275 return false;
7276}
7277
7278int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
7279 int emulation_type, void *insn, int insn_len)
7280{
7281 int r;
7282 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
7283 bool writeback = true;
7284 bool write_fault_to_spt;
7285
7286 if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, insn, insn_len)))
7287 return 1;
7288
7289 vcpu->arch.l1tf_flush_l1d = true;
7290
7291
7292
7293
7294
7295 write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
7296 vcpu->arch.write_fault_to_shadow_pgtable = false;
7297 kvm_clear_exception_queue(vcpu);
7298
7299 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
7300 init_emulate_ctxt(vcpu);
7301
7302
7303
7304
7305
7306
7307
7308 if (!(emulation_type & EMULTYPE_SKIP) &&
7309 kvm_vcpu_check_breakpoint(vcpu, &r))
7310 return r;
7311
7312 ctxt->interruptibility = 0;
7313 ctxt->have_exception = false;
7314 ctxt->exception.vector = -1;
7315 ctxt->perm_ok = false;
7316
7317 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
7318
7319 r = x86_decode_insn(ctxt, insn, insn_len);
7320
7321 trace_kvm_emulate_insn_start(vcpu);
7322 ++vcpu->stat.insn_emulation;
7323 if (r != EMULATION_OK) {
7324 if ((emulation_type & EMULTYPE_TRAP_UD) ||
7325 (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
7326 kvm_queue_exception(vcpu, UD_VECTOR);
7327 return 1;
7328 }
7329 if (reexecute_instruction(vcpu, cr2_or_gpa,
7330 write_fault_to_spt,
7331 emulation_type))
7332 return 1;
7333 if (ctxt->have_exception) {
7334
7335
7336
7337
7338 WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
7339 exception_type(ctxt->exception.vector) == EXCPT_TRAP);
7340 inject_emulated_exception(vcpu);
7341 return 1;
7342 }
7343 return handle_emulation_failure(vcpu, emulation_type);
7344 }
7345 }
7346
7347 if ((emulation_type & EMULTYPE_VMWARE_GP) &&
7348 !is_vmware_backdoor_opcode(ctxt)) {
7349 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
7350 return 1;
7351 }
7352
7353
7354
7355
7356
7357
7358 if (emulation_type & EMULTYPE_SKIP) {
7359 kvm_rip_write(vcpu, ctxt->_eip);
7360 if (ctxt->eflags & X86_EFLAGS_RF)
7361 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
7362 return 1;
7363 }
7364
7365 if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
7366 return 1;
7367
7368
7369
7370 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
7371 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
7372 emulator_invalidate_register_cache(ctxt);
7373 }
7374
7375restart:
7376 if (emulation_type & EMULTYPE_PF) {
7377
7378 ctxt->exception.address = cr2_or_gpa;
7379
7380
7381 if (vcpu->arch.mmu->direct_map) {
7382 ctxt->gpa_available = true;
7383 ctxt->gpa_val = cr2_or_gpa;
7384 }
7385 } else {
7386
7387 ctxt->exception.address = 0;
7388 }
7389
7390 r = x86_emulate_insn(ctxt);
7391
7392 if (r == EMULATION_INTERCEPTED)
7393 return 1;
7394
7395 if (r == EMULATION_FAILED) {
7396 if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
7397 emulation_type))
7398 return 1;
7399
7400 return handle_emulation_failure(vcpu, emulation_type);
7401 }
7402
7403 if (ctxt->have_exception) {
7404 r = 1;
7405 if (inject_emulated_exception(vcpu))
7406 return r;
7407 } else if (vcpu->arch.pio.count) {
7408 if (!vcpu->arch.pio.in) {
7409
7410 vcpu->arch.pio.count = 0;
7411 } else {
7412 writeback = false;
7413 vcpu->arch.complete_userspace_io = complete_emulated_pio;
7414 }
7415 r = 0;
7416 } else if (vcpu->mmio_needed) {
7417 ++vcpu->stat.mmio_exits;
7418
7419 if (!vcpu->mmio_is_write)
7420 writeback = false;
7421 r = 0;
7422 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
7423 } else if (r == EMULATION_RESTART)
7424 goto restart;
7425 else
7426 r = 1;
7427
7428 if (writeback) {
7429 unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
7430 toggle_interruptibility(vcpu, ctxt->interruptibility);
7431 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
7432 if (!ctxt->have_exception ||
7433 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
7434 kvm_rip_write(vcpu, ctxt->eip);
7435 if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
7436 r = kvm_vcpu_do_singlestep(vcpu);
7437 if (kvm_x86_ops.update_emulated_instruction)
7438 kvm_x86_ops.update_emulated_instruction(vcpu);
7439 __kvm_set_rflags(vcpu, ctxt->eflags);
7440 }
7441
7442
7443
7444
7445
7446
7447
7448 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
7449 kvm_make_request(KVM_REQ_EVENT, vcpu);
7450 } else
7451 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
7452
7453 return r;
7454}
7455
7456int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
7457{
7458 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
7459}
7460EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
7461
7462int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
7463 void *insn, int insn_len)
7464{
7465 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
7466}
7467EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
7468
7469static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
7470{
7471 vcpu->arch.pio.count = 0;
7472 return 1;
7473}
7474
7475static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
7476{
7477 vcpu->arch.pio.count = 0;
7478
7479 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
7480 return 1;
7481
7482 return kvm_skip_emulated_instruction(vcpu);
7483}
7484
7485static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
7486 unsigned short port)
7487{
7488 unsigned long val = kvm_rax_read(vcpu);
7489 int ret = emulator_pio_out(vcpu, size, port, &val, 1);
7490
7491 if (ret)
7492 return ret;
7493
7494
7495
7496
7497
7498 if (port == 0x7e &&
7499 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
7500 vcpu->arch.complete_userspace_io =
7501 complete_fast_pio_out_port_0x7e;
7502 kvm_skip_emulated_instruction(vcpu);
7503 } else {
7504 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
7505 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
7506 }
7507 return 0;
7508}
7509
7510static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
7511{
7512 unsigned long val;
7513
7514
7515 BUG_ON(vcpu->arch.pio.count != 1);
7516
7517 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
7518 vcpu->arch.pio.count = 0;
7519 return 1;
7520 }
7521
7522
7523 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
7524
7525
7526
7527
7528
7529 emulator_pio_in(vcpu, vcpu->arch.pio.size, vcpu->arch.pio.port, &val, 1);
7530 kvm_rax_write(vcpu, val);
7531
7532 return kvm_skip_emulated_instruction(vcpu);
7533}
7534
7535static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
7536 unsigned short port)
7537{
7538 unsigned long val;
7539 int ret;
7540
7541
7542 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
7543
7544 ret = emulator_pio_in(vcpu, size, port, &val, 1);
7545 if (ret) {
7546 kvm_rax_write(vcpu, val);
7547 return ret;
7548 }
7549
7550 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
7551 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
7552
7553 return 0;
7554}
7555
7556int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
7557{
7558 int ret;
7559
7560 if (in)
7561 ret = kvm_fast_pio_in(vcpu, size, port);
7562 else
7563 ret = kvm_fast_pio_out(vcpu, size, port);
7564 return ret && kvm_skip_emulated_instruction(vcpu);
7565}
7566EXPORT_SYMBOL_GPL(kvm_fast_pio);
7567
7568static int kvmclock_cpu_down_prep(unsigned int cpu)
7569{
7570 __this_cpu_write(cpu_tsc_khz, 0);
7571 return 0;
7572}
7573
7574static void tsc_khz_changed(void *data)
7575{
7576 struct cpufreq_freqs *freq = data;
7577 unsigned long khz = 0;
7578
7579 if (data)
7580 khz = freq->new;
7581 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
7582 khz = cpufreq_quick_get(raw_smp_processor_id());
7583 if (!khz)
7584 khz = tsc_khz;
7585 __this_cpu_write(cpu_tsc_khz, khz);
7586}
7587
7588#ifdef CONFIG_X86_64
7589static void kvm_hyperv_tsc_notifier(void)
7590{
7591 struct kvm *kvm;
7592 struct kvm_vcpu *vcpu;
7593 int cpu;
7594
7595 mutex_lock(&kvm_lock);
7596 list_for_each_entry(kvm, &vm_list, vm_list)
7597 kvm_make_mclock_inprogress_request(kvm);
7598
7599 hyperv_stop_tsc_emulation();
7600
7601
7602 for_each_present_cpu(cpu)
7603 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
7604 kvm_max_guest_tsc_khz = tsc_khz;
7605
7606 list_for_each_entry(kvm, &vm_list, vm_list) {
7607 struct kvm_arch *ka = &kvm->arch;
7608
7609 spin_lock(&ka->pvclock_gtod_sync_lock);
7610
7611 pvclock_update_vm_gtod_copy(kvm);
7612
7613 kvm_for_each_vcpu(cpu, vcpu, kvm)
7614 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7615
7616 kvm_for_each_vcpu(cpu, vcpu, kvm)
7617 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
7618
7619 spin_unlock(&ka->pvclock_gtod_sync_lock);
7620 }
7621 mutex_unlock(&kvm_lock);
7622}
7623#endif
7624
7625static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
7626{
7627 struct kvm *kvm;
7628 struct kvm_vcpu *vcpu;
7629 int i, send_ipi = 0;
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
7671
7672 mutex_lock(&kvm_lock);
7673 list_for_each_entry(kvm, &vm_list, vm_list) {
7674 kvm_for_each_vcpu(i, vcpu, kvm) {
7675 if (vcpu->cpu != cpu)
7676 continue;
7677 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7678 if (vcpu->cpu != raw_smp_processor_id())
7679 send_ipi = 1;
7680 }
7681 }
7682 mutex_unlock(&kvm_lock);
7683
7684 if (freq->old < freq->new && send_ipi) {
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
7698 }
7699}
7700
7701static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
7702 void *data)
7703{
7704 struct cpufreq_freqs *freq = data;
7705 int cpu;
7706
7707 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
7708 return 0;
7709 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
7710 return 0;
7711
7712 for_each_cpu(cpu, freq->policy->cpus)
7713 __kvmclock_cpufreq_notifier(freq, cpu);
7714
7715 return 0;
7716}
7717
7718static struct notifier_block kvmclock_cpufreq_notifier_block = {
7719 .notifier_call = kvmclock_cpufreq_notifier
7720};
7721
7722static int kvmclock_cpu_online(unsigned int cpu)
7723{
7724 tsc_khz_changed(NULL);
7725 return 0;
7726}
7727
7728static void kvm_timer_init(void)
7729{
7730 max_tsc_khz = tsc_khz;
7731
7732 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
7733#ifdef CONFIG_CPU_FREQ
7734 struct cpufreq_policy *policy;
7735 int cpu;
7736
7737 cpu = get_cpu();
7738 policy = cpufreq_cpu_get(cpu);
7739 if (policy) {
7740 if (policy->cpuinfo.max_freq)
7741 max_tsc_khz = policy->cpuinfo.max_freq;
7742 cpufreq_cpu_put(policy);
7743 }
7744 put_cpu();
7745#endif
7746 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
7747 CPUFREQ_TRANSITION_NOTIFIER);
7748 }
7749
7750 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
7751 kvmclock_cpu_online, kvmclock_cpu_down_prep);
7752}
7753
7754DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
7755EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
7756
7757int kvm_is_in_guest(void)
7758{
7759 return __this_cpu_read(current_vcpu) != NULL;
7760}
7761
7762static int kvm_is_user_mode(void)
7763{
7764 int user_mode = 3;
7765
7766 if (__this_cpu_read(current_vcpu))
7767 user_mode = kvm_x86_ops.get_cpl(__this_cpu_read(current_vcpu));
7768
7769 return user_mode != 0;
7770}
7771
7772static unsigned long kvm_get_guest_ip(void)
7773{
7774 unsigned long ip = 0;
7775
7776 if (__this_cpu_read(current_vcpu))
7777 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
7778
7779 return ip;
7780}
7781
7782static void kvm_handle_intel_pt_intr(void)
7783{
7784 struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
7785
7786 kvm_make_request(KVM_REQ_PMI, vcpu);
7787 __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
7788 (unsigned long *)&vcpu->arch.pmu.global_status);
7789}
7790
7791static struct perf_guest_info_callbacks kvm_guest_cbs = {
7792 .is_in_guest = kvm_is_in_guest,
7793 .is_user_mode = kvm_is_user_mode,
7794 .get_guest_ip = kvm_get_guest_ip,
7795 .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
7796};
7797
7798#ifdef CONFIG_X86_64
7799static void pvclock_gtod_update_fn(struct work_struct *work)
7800{
7801 struct kvm *kvm;
7802
7803 struct kvm_vcpu *vcpu;
7804 int i;
7805
7806 mutex_lock(&kvm_lock);
7807 list_for_each_entry(kvm, &vm_list, vm_list)
7808 kvm_for_each_vcpu(i, vcpu, kvm)
7809 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7810 atomic_set(&kvm_guest_has_master_clock, 0);
7811 mutex_unlock(&kvm_lock);
7812}
7813
7814static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
7815
7816
7817
7818
7819static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
7820 void *priv)
7821{
7822 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
7823 struct timekeeper *tk = priv;
7824
7825 update_pvclock_gtod(tk);
7826
7827
7828
7829
7830 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
7831 atomic_read(&kvm_guest_has_master_clock) != 0)
7832 queue_work(system_long_wq, &pvclock_gtod_work);
7833
7834 return 0;
7835}
7836
7837static struct notifier_block pvclock_gtod_notifier = {
7838 .notifier_call = pvclock_gtod_notify,
7839};
7840#endif
7841
7842int kvm_arch_init(void *opaque)
7843{
7844 struct kvm_x86_init_ops *ops = opaque;
7845 int r;
7846
7847 if (kvm_x86_ops.hardware_enable) {
7848 printk(KERN_ERR "kvm: already loaded the other module\n");
7849 r = -EEXIST;
7850 goto out;
7851 }
7852
7853 if (!ops->cpu_has_kvm_support()) {
7854 pr_err_ratelimited("kvm: no hardware support\n");
7855 r = -EOPNOTSUPP;
7856 goto out;
7857 }
7858 if (ops->disabled_by_bios()) {
7859 pr_err_ratelimited("kvm: disabled by bios\n");
7860 r = -EOPNOTSUPP;
7861 goto out;
7862 }
7863
7864
7865
7866
7867
7868
7869 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
7870 printk(KERN_ERR "kvm: inadequate fpu\n");
7871 r = -EOPNOTSUPP;
7872 goto out;
7873 }
7874
7875 r = -ENOMEM;
7876 x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
7877 __alignof__(struct fpu), SLAB_ACCOUNT,
7878 NULL);
7879 if (!x86_fpu_cache) {
7880 printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
7881 goto out;
7882 }
7883
7884 x86_emulator_cache = kvm_alloc_emulator_cache();
7885 if (!x86_emulator_cache) {
7886 pr_err("kvm: failed to allocate cache for x86 emulator\n");
7887 goto out_free_x86_fpu_cache;
7888 }
7889
7890 user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
7891 if (!user_return_msrs) {
7892 printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
7893 goto out_free_x86_emulator_cache;
7894 }
7895
7896 r = kvm_mmu_module_init();
7897 if (r)
7898 goto out_free_percpu;
7899
7900 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
7901 PT_DIRTY_MASK, PT64_NX_MASK, 0,
7902 PT_PRESENT_MASK, 0, sme_me_mask);
7903 kvm_timer_init();
7904
7905 perf_register_guest_info_callbacks(&kvm_guest_cbs);
7906
7907 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
7908 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
7909 supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
7910 }
7911
7912 kvm_lapic_init();
7913 if (pi_inject_timer == -1)
7914 pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
7915#ifdef CONFIG_X86_64
7916 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
7917
7918 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7919 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
7920#endif
7921
7922 return 0;
7923
7924out_free_percpu:
7925 free_percpu(user_return_msrs);
7926out_free_x86_emulator_cache:
7927 kmem_cache_destroy(x86_emulator_cache);
7928out_free_x86_fpu_cache:
7929 kmem_cache_destroy(x86_fpu_cache);
7930out:
7931 return r;
7932}
7933
7934void kvm_arch_exit(void)
7935{
7936#ifdef CONFIG_X86_64
7937 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7938 clear_hv_tscchange_cb();
7939#endif
7940 kvm_lapic_exit();
7941 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
7942
7943 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
7944 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
7945 CPUFREQ_TRANSITION_NOTIFIER);
7946 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
7947#ifdef CONFIG_X86_64
7948 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
7949#endif
7950 kvm_x86_ops.hardware_enable = NULL;
7951 kvm_mmu_module_exit();
7952 free_percpu(user_return_msrs);
7953 kmem_cache_destroy(x86_fpu_cache);
7954}
7955
7956int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
7957{
7958 ++vcpu->stat.halt_exits;
7959 if (lapic_in_kernel(vcpu)) {
7960 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
7961 return 1;
7962 } else {
7963 vcpu->run->exit_reason = KVM_EXIT_HLT;
7964 return 0;
7965 }
7966}
7967EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
7968
7969int kvm_emulate_halt(struct kvm_vcpu *vcpu)
7970{
7971 int ret = kvm_skip_emulated_instruction(vcpu);
7972
7973
7974
7975
7976 return kvm_vcpu_halt(vcpu) && ret;
7977}
7978EXPORT_SYMBOL_GPL(kvm_emulate_halt);
7979
7980#ifdef CONFIG_X86_64
7981static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
7982 unsigned long clock_type)
7983{
7984 struct kvm_clock_pairing clock_pairing;
7985 struct timespec64 ts;
7986 u64 cycle;
7987 int ret;
7988
7989 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
7990 return -KVM_EOPNOTSUPP;
7991
7992 if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
7993 return -KVM_EOPNOTSUPP;
7994
7995 clock_pairing.sec = ts.tv_sec;
7996 clock_pairing.nsec = ts.tv_nsec;
7997 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
7998 clock_pairing.flags = 0;
7999 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
8000
8001 ret = 0;
8002 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
8003 sizeof(struct kvm_clock_pairing)))
8004 ret = -KVM_EFAULT;
8005
8006 return ret;
8007}
8008#endif
8009
8010
8011
8012
8013
8014
8015static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
8016{
8017 struct kvm_lapic_irq lapic_irq;
8018
8019 lapic_irq.shorthand = APIC_DEST_NOSHORT;
8020 lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
8021 lapic_irq.level = 0;
8022 lapic_irq.dest_id = apicid;
8023 lapic_irq.msi_redir_hint = false;
8024
8025 lapic_irq.delivery_mode = APIC_DM_REMRD;
8026 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
8027}
8028
8029bool kvm_apicv_activated(struct kvm *kvm)
8030{
8031 return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
8032}
8033EXPORT_SYMBOL_GPL(kvm_apicv_activated);
8034
8035void kvm_apicv_init(struct kvm *kvm, bool enable)
8036{
8037 if (enable)
8038 clear_bit(APICV_INHIBIT_REASON_DISABLE,
8039 &kvm->arch.apicv_inhibit_reasons);
8040 else
8041 set_bit(APICV_INHIBIT_REASON_DISABLE,
8042 &kvm->arch.apicv_inhibit_reasons);
8043}
8044EXPORT_SYMBOL_GPL(kvm_apicv_init);
8045
8046static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
8047{
8048 struct kvm_vcpu *target = NULL;
8049 struct kvm_apic_map *map;
8050
8051 rcu_read_lock();
8052 map = rcu_dereference(kvm->arch.apic_map);
8053
8054 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
8055 target = map->phys_map[dest_id]->vcpu;
8056
8057 rcu_read_unlock();
8058
8059 if (target && READ_ONCE(target->ready))
8060 kvm_vcpu_yield_to(target);
8061}
8062
8063int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
8064{
8065 unsigned long nr, a0, a1, a2, a3, ret;
8066 int op_64_bit;
8067
8068 if (kvm_hv_hypercall_enabled(vcpu->kvm))
8069 return kvm_hv_hypercall(vcpu);
8070
8071 nr = kvm_rax_read(vcpu);
8072 a0 = kvm_rbx_read(vcpu);
8073 a1 = kvm_rcx_read(vcpu);
8074 a2 = kvm_rdx_read(vcpu);
8075 a3 = kvm_rsi_read(vcpu);
8076
8077 trace_kvm_hypercall(nr, a0, a1, a2, a3);
8078
8079 op_64_bit = is_64_bit_mode(vcpu);
8080 if (!op_64_bit) {
8081 nr &= 0xFFFFFFFF;
8082 a0 &= 0xFFFFFFFF;
8083 a1 &= 0xFFFFFFFF;
8084 a2 &= 0xFFFFFFFF;
8085 a3 &= 0xFFFFFFFF;
8086 }
8087
8088 if (kvm_x86_ops.get_cpl(vcpu) != 0) {
8089 ret = -KVM_EPERM;
8090 goto out;
8091 }
8092
8093 ret = -KVM_ENOSYS;
8094
8095 switch (nr) {
8096 case KVM_HC_VAPIC_POLL_IRQ:
8097 ret = 0;
8098 break;
8099 case KVM_HC_KICK_CPU:
8100 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
8101 break;
8102
8103 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
8104 kvm_sched_yield(vcpu->kvm, a1);
8105 ret = 0;
8106 break;
8107#ifdef CONFIG_X86_64
8108 case KVM_HC_CLOCK_PAIRING:
8109 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
8110 break;
8111#endif
8112 case KVM_HC_SEND_IPI:
8113 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
8114 break;
8115
8116 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
8117 break;
8118 case KVM_HC_SCHED_YIELD:
8119 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
8120 break;
8121
8122 kvm_sched_yield(vcpu->kvm, a0);
8123 ret = 0;
8124 break;
8125 default:
8126 ret = -KVM_ENOSYS;
8127 break;
8128 }
8129out:
8130 if (!op_64_bit)
8131 ret = (u32)ret;
8132 kvm_rax_write(vcpu, ret);
8133
8134 ++vcpu->stat.hypercalls;
8135 return kvm_skip_emulated_instruction(vcpu);
8136}
8137EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
8138
8139static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
8140{
8141 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
8142 char instruction[3];
8143 unsigned long rip = kvm_rip_read(vcpu);
8144
8145 kvm_x86_ops.patch_hypercall(vcpu, instruction);
8146
8147 return emulator_write_emulated(ctxt, rip, instruction, 3,
8148 &ctxt->exception);
8149}
8150
8151static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
8152{
8153 return vcpu->run->request_interrupt_window &&
8154 likely(!pic_in_kernel(vcpu->kvm));
8155}
8156
8157static void post_kvm_run_save(struct kvm_vcpu *vcpu)
8158{
8159 struct kvm_run *kvm_run = vcpu->run;
8160
8161 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
8162 kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
8163 kvm_run->cr8 = kvm_get_cr8(vcpu);
8164 kvm_run->apic_base = kvm_get_apic_base(vcpu);
8165 kvm_run->ready_for_interrupt_injection =
8166 pic_in_kernel(vcpu->kvm) ||
8167 kvm_vcpu_ready_for_interrupt_injection(vcpu);
8168}
8169
8170static void update_cr8_intercept(struct kvm_vcpu *vcpu)
8171{
8172 int max_irr, tpr;
8173
8174 if (!kvm_x86_ops.update_cr8_intercept)
8175 return;
8176
8177 if (!lapic_in_kernel(vcpu))
8178 return;
8179
8180 if (vcpu->arch.apicv_active)
8181 return;
8182
8183 if (!vcpu->arch.apic->vapic_addr)
8184 max_irr = kvm_lapic_find_highest_irr(vcpu);
8185 else
8186 max_irr = -1;
8187
8188 if (max_irr != -1)
8189 max_irr >>= 4;
8190
8191 tpr = kvm_lapic_get_cr8(vcpu);
8192
8193 kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr);
8194}
8195
8196static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
8197{
8198 int r;
8199 bool can_inject = true;
8200
8201
8202
8203 if (vcpu->arch.exception.injected) {
8204 kvm_x86_ops.queue_exception(vcpu);
8205 can_inject = false;
8206 }
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221 else if (!vcpu->arch.exception.pending) {
8222 if (vcpu->arch.nmi_injected) {
8223 kvm_x86_ops.set_nmi(vcpu);
8224 can_inject = false;
8225 } else if (vcpu->arch.interrupt.injected) {
8226 kvm_x86_ops.set_irq(vcpu);
8227 can_inject = false;
8228 }
8229 }
8230
8231 WARN_ON_ONCE(vcpu->arch.exception.injected &&
8232 vcpu->arch.exception.pending);
8233
8234
8235
8236
8237
8238
8239
8240 if (is_guest_mode(vcpu)) {
8241 r = kvm_x86_ops.nested_ops->check_events(vcpu);
8242 if (r < 0)
8243 goto busy;
8244 }
8245
8246
8247 if (vcpu->arch.exception.pending) {
8248 trace_kvm_inj_exception(vcpu->arch.exception.nr,
8249 vcpu->arch.exception.has_error_code,
8250 vcpu->arch.exception.error_code);
8251
8252 vcpu->arch.exception.pending = false;
8253 vcpu->arch.exception.injected = true;
8254
8255 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
8256 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
8257 X86_EFLAGS_RF);
8258
8259 if (vcpu->arch.exception.nr == DB_VECTOR) {
8260 kvm_deliver_exception_payload(vcpu);
8261 if (vcpu->arch.dr7 & DR7_GD) {
8262 vcpu->arch.dr7 &= ~DR7_GD;
8263 kvm_update_dr7(vcpu);
8264 }
8265 }
8266
8267 kvm_x86_ops.queue_exception(vcpu);
8268 can_inject = false;
8269 }
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282 if (vcpu->arch.smi_pending) {
8283 r = can_inject ? kvm_x86_ops.smi_allowed(vcpu, true) : -EBUSY;
8284 if (r < 0)
8285 goto busy;
8286 if (r) {
8287 vcpu->arch.smi_pending = false;
8288 ++vcpu->arch.smi_count;
8289 enter_smm(vcpu);
8290 can_inject = false;
8291 } else
8292 kvm_x86_ops.enable_smi_window(vcpu);
8293 }
8294
8295 if (vcpu->arch.nmi_pending) {
8296 r = can_inject ? kvm_x86_ops.nmi_allowed(vcpu, true) : -EBUSY;
8297 if (r < 0)
8298 goto busy;
8299 if (r) {
8300 --vcpu->arch.nmi_pending;
8301 vcpu->arch.nmi_injected = true;
8302 kvm_x86_ops.set_nmi(vcpu);
8303 can_inject = false;
8304 WARN_ON(kvm_x86_ops.nmi_allowed(vcpu, true) < 0);
8305 }
8306 if (vcpu->arch.nmi_pending)
8307 kvm_x86_ops.enable_nmi_window(vcpu);
8308 }
8309
8310 if (kvm_cpu_has_injectable_intr(vcpu)) {
8311 r = can_inject ? kvm_x86_ops.interrupt_allowed(vcpu, true) : -EBUSY;
8312 if (r < 0)
8313 goto busy;
8314 if (r) {
8315 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
8316 kvm_x86_ops.set_irq(vcpu);
8317 WARN_ON(kvm_x86_ops.interrupt_allowed(vcpu, true) < 0);
8318 }
8319 if (kvm_cpu_has_injectable_intr(vcpu))
8320 kvm_x86_ops.enable_irq_window(vcpu);
8321 }
8322
8323 if (is_guest_mode(vcpu) &&
8324 kvm_x86_ops.nested_ops->hv_timer_pending &&
8325 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
8326 *req_immediate_exit = true;
8327
8328 WARN_ON(vcpu->arch.exception.pending);
8329 return;
8330
8331busy:
8332 *req_immediate_exit = true;
8333 return;
8334}
8335
8336static void process_nmi(struct kvm_vcpu *vcpu)
8337{
8338 unsigned limit = 2;
8339
8340
8341
8342
8343
8344
8345 if (kvm_x86_ops.get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
8346 limit = 1;
8347
8348 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
8349 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
8350 kvm_make_request(KVM_REQ_EVENT, vcpu);
8351}
8352
8353static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
8354{
8355 u32 flags = 0;
8356 flags |= seg->g << 23;
8357 flags |= seg->db << 22;
8358 flags |= seg->l << 21;
8359 flags |= seg->avl << 20;
8360 flags |= seg->present << 15;
8361 flags |= seg->dpl << 13;
8362 flags |= seg->s << 12;
8363 flags |= seg->type << 8;
8364 return flags;
8365}
8366
8367static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
8368{
8369 struct kvm_segment seg;
8370 int offset;
8371
8372 kvm_get_segment(vcpu, &seg, n);
8373 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
8374
8375 if (n < 3)
8376 offset = 0x7f84 + n * 12;
8377 else
8378 offset = 0x7f2c + (n - 3) * 12;
8379
8380 put_smstate(u32, buf, offset + 8, seg.base);
8381 put_smstate(u32, buf, offset + 4, seg.limit);
8382 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
8383}
8384
8385#ifdef CONFIG_X86_64
8386static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
8387{
8388 struct kvm_segment seg;
8389 int offset;
8390 u16 flags;
8391
8392 kvm_get_segment(vcpu, &seg, n);
8393 offset = 0x7e00 + n * 16;
8394
8395 flags = enter_smm_get_segment_flags(&seg) >> 8;
8396 put_smstate(u16, buf, offset, seg.selector);
8397 put_smstate(u16, buf, offset + 2, flags);
8398 put_smstate(u32, buf, offset + 4, seg.limit);
8399 put_smstate(u64, buf, offset + 8, seg.base);
8400}
8401#endif
8402
8403static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
8404{
8405 struct desc_ptr dt;
8406 struct kvm_segment seg;
8407 unsigned long val;
8408 int i;
8409
8410 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
8411 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
8412 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
8413 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
8414
8415 for (i = 0; i < 8; i++)
8416 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
8417
8418 kvm_get_dr(vcpu, 6, &val);
8419 put_smstate(u32, buf, 0x7fcc, (u32)val);
8420 kvm_get_dr(vcpu, 7, &val);
8421 put_smstate(u32, buf, 0x7fc8, (u32)val);
8422
8423 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
8424 put_smstate(u32, buf, 0x7fc4, seg.selector);
8425 put_smstate(u32, buf, 0x7f64, seg.base);
8426 put_smstate(u32, buf, 0x7f60, seg.limit);
8427 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
8428
8429 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
8430 put_smstate(u32, buf, 0x7fc0, seg.selector);
8431 put_smstate(u32, buf, 0x7f80, seg.base);
8432 put_smstate(u32, buf, 0x7f7c, seg.limit);
8433 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
8434
8435 kvm_x86_ops.get_gdt(vcpu, &dt);
8436 put_smstate(u32, buf, 0x7f74, dt.address);
8437 put_smstate(u32, buf, 0x7f70, dt.size);
8438
8439 kvm_x86_ops.get_idt(vcpu, &dt);
8440 put_smstate(u32, buf, 0x7f58, dt.address);
8441 put_smstate(u32, buf, 0x7f54, dt.size);
8442
8443 for (i = 0; i < 6; i++)
8444 enter_smm_save_seg_32(vcpu, buf, i);
8445
8446 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
8447
8448
8449 put_smstate(u32, buf, 0x7efc, 0x00020000);
8450 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
8451}
8452
8453#ifdef CONFIG_X86_64
8454static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
8455{
8456 struct desc_ptr dt;
8457 struct kvm_segment seg;
8458 unsigned long val;
8459 int i;
8460
8461 for (i = 0; i < 16; i++)
8462 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
8463
8464 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
8465 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
8466
8467 kvm_get_dr(vcpu, 6, &val);
8468 put_smstate(u64, buf, 0x7f68, val);
8469 kvm_get_dr(vcpu, 7, &val);
8470 put_smstate(u64, buf, 0x7f60, val);
8471
8472 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
8473 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
8474 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
8475
8476 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
8477
8478
8479 put_smstate(u32, buf, 0x7efc, 0x00020064);
8480
8481 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
8482
8483 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
8484 put_smstate(u16, buf, 0x7e90, seg.selector);
8485 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
8486 put_smstate(u32, buf, 0x7e94, seg.limit);
8487 put_smstate(u64, buf, 0x7e98, seg.base);
8488
8489 kvm_x86_ops.get_idt(vcpu, &dt);
8490 put_smstate(u32, buf, 0x7e84, dt.size);
8491 put_smstate(u64, buf, 0x7e88, dt.address);
8492
8493 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
8494 put_smstate(u16, buf, 0x7e70, seg.selector);
8495 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
8496 put_smstate(u32, buf, 0x7e74, seg.limit);
8497 put_smstate(u64, buf, 0x7e78, seg.base);
8498
8499 kvm_x86_ops.get_gdt(vcpu, &dt);
8500 put_smstate(u32, buf, 0x7e64, dt.size);
8501 put_smstate(u64, buf, 0x7e68, dt.address);
8502
8503 for (i = 0; i < 6; i++)
8504 enter_smm_save_seg_64(vcpu, buf, i);
8505}
8506#endif
8507
8508static void enter_smm(struct kvm_vcpu *vcpu)
8509{
8510 struct kvm_segment cs, ds;
8511 struct desc_ptr dt;
8512 char buf[512];
8513 u32 cr0;
8514
8515 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
8516 memset(buf, 0, 512);
8517#ifdef CONFIG_X86_64
8518 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
8519 enter_smm_save_state_64(vcpu, buf);
8520 else
8521#endif
8522 enter_smm_save_state_32(vcpu, buf);
8523
8524
8525
8526
8527
8528
8529 kvm_x86_ops.pre_enter_smm(vcpu, buf);
8530
8531 vcpu->arch.hflags |= HF_SMM_MASK;
8532 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
8533
8534 if (kvm_x86_ops.get_nmi_mask(vcpu))
8535 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
8536 else
8537 kvm_x86_ops.set_nmi_mask(vcpu, true);
8538
8539 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
8540 kvm_rip_write(vcpu, 0x8000);
8541
8542 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
8543 kvm_x86_ops.set_cr0(vcpu, cr0);
8544 vcpu->arch.cr0 = cr0;
8545
8546 kvm_x86_ops.set_cr4(vcpu, 0);
8547
8548
8549 dt.address = dt.size = 0;
8550 kvm_x86_ops.set_idt(vcpu, &dt);
8551
8552 __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
8553
8554 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
8555 cs.base = vcpu->arch.smbase;
8556
8557 ds.selector = 0;
8558 ds.base = 0;
8559
8560 cs.limit = ds.limit = 0xffffffff;
8561 cs.type = ds.type = 0x3;
8562 cs.dpl = ds.dpl = 0;
8563 cs.db = ds.db = 0;
8564 cs.s = ds.s = 1;
8565 cs.l = ds.l = 0;
8566 cs.g = ds.g = 1;
8567 cs.avl = ds.avl = 0;
8568 cs.present = ds.present = 1;
8569 cs.unusable = ds.unusable = 0;
8570 cs.padding = ds.padding = 0;
8571
8572 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
8573 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
8574 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
8575 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
8576 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
8577 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
8578
8579#ifdef CONFIG_X86_64
8580 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
8581 kvm_x86_ops.set_efer(vcpu, 0);
8582#endif
8583
8584 kvm_update_cpuid_runtime(vcpu);
8585 kvm_mmu_reset_context(vcpu);
8586}
8587
8588static void process_smi(struct kvm_vcpu *vcpu)
8589{
8590 vcpu->arch.smi_pending = true;
8591 kvm_make_request(KVM_REQ_EVENT, vcpu);
8592}
8593
8594void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
8595 unsigned long *vcpu_bitmap)
8596{
8597 cpumask_var_t cpus;
8598
8599 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
8600
8601 kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
8602 NULL, vcpu_bitmap, cpus);
8603
8604 free_cpumask_var(cpus);
8605}
8606
8607void kvm_make_scan_ioapic_request(struct kvm *kvm)
8608{
8609 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
8610}
8611
8612void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
8613{
8614 if (!lapic_in_kernel(vcpu))
8615 return;
8616
8617 vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
8618 kvm_apic_update_apicv(vcpu);
8619 kvm_x86_ops.refresh_apicv_exec_ctrl(vcpu);
8620}
8621EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
8622
8623
8624
8625
8626
8627
8628
8629
8630void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
8631{
8632 struct kvm_vcpu *except;
8633 unsigned long old, new, expected;
8634
8635 if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
8636 !kvm_x86_ops.check_apicv_inhibit_reasons(bit))
8637 return;
8638
8639 old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
8640 do {
8641 expected = new = old;
8642 if (activate)
8643 __clear_bit(bit, &new);
8644 else
8645 __set_bit(bit, &new);
8646 if (new == old)
8647 break;
8648 old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
8649 } while (old != expected);
8650
8651 if (!!old == !!new)
8652 return;
8653
8654 trace_kvm_apicv_update_request(activate, bit);
8655 if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
8656 kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate);
8657
8658
8659
8660
8661
8662
8663 except = kvm_get_running_vcpu();
8664 kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
8665 except);
8666 if (except)
8667 kvm_vcpu_update_apicv(except);
8668}
8669EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
8670
8671static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
8672{
8673 if (!kvm_apic_present(vcpu))
8674 return;
8675
8676 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
8677
8678 if (irqchip_split(vcpu->kvm))
8679 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
8680 else {
8681 if (vcpu->arch.apicv_active)
8682 kvm_x86_ops.sync_pir_to_irr(vcpu);
8683 if (ioapic_in_kernel(vcpu->kvm))
8684 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
8685 }
8686
8687 if (is_guest_mode(vcpu))
8688 vcpu->arch.load_eoi_exitmap_pending = true;
8689 else
8690 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
8691}
8692
8693static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
8694{
8695 u64 eoi_exit_bitmap[4];
8696
8697 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
8698 return;
8699
8700 bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
8701 vcpu_to_synic(vcpu)->vec_bitmap, 256);
8702 kvm_x86_ops.load_eoi_exitmap(vcpu, eoi_exit_bitmap);
8703}
8704
8705void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
8706 unsigned long start, unsigned long end)
8707{
8708 unsigned long apic_address;
8709
8710
8711
8712
8713
8714 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
8715 if (start <= apic_address && apic_address < end)
8716 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
8717}
8718
8719void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
8720{
8721 if (!lapic_in_kernel(vcpu))
8722 return;
8723
8724 if (!kvm_x86_ops.set_apic_access_page_addr)
8725 return;
8726
8727 kvm_x86_ops.set_apic_access_page_addr(vcpu);
8728}
8729
8730void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
8731{
8732 smp_send_reschedule(vcpu->cpu);
8733}
8734EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
8735
8736
8737
8738
8739
8740
8741static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
8742{
8743 int r;
8744 bool req_int_win =
8745 dm_request_for_irq_injection(vcpu) &&
8746 kvm_cpu_accept_dm_intr(vcpu);
8747 fastpath_t exit_fastpath;
8748
8749 bool req_immediate_exit = false;
8750
8751 if (kvm_request_pending(vcpu)) {
8752 if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
8753 if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
8754 r = 0;
8755 goto out;
8756 }
8757 }
8758 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
8759 kvm_mmu_unload(vcpu);
8760 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
8761 __kvm_migrate_timers(vcpu);
8762 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
8763 kvm_gen_update_masterclock(vcpu->kvm);
8764 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
8765 kvm_gen_kvmclock_update(vcpu);
8766 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
8767 r = kvm_guest_time_update(vcpu);
8768 if (unlikely(r))
8769 goto out;
8770 }
8771 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
8772 kvm_mmu_sync_roots(vcpu);
8773 if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
8774 kvm_mmu_load_pgd(vcpu);
8775 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
8776 kvm_vcpu_flush_tlb_all(vcpu);
8777
8778
8779 kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
8780 }
8781 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
8782 kvm_vcpu_flush_tlb_current(vcpu);
8783 if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
8784 kvm_vcpu_flush_tlb_guest(vcpu);
8785
8786 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
8787 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
8788 r = 0;
8789 goto out;
8790 }
8791 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
8792 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
8793 vcpu->mmio_needed = 0;
8794 r = 0;
8795 goto out;
8796 }
8797 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
8798
8799 vcpu->arch.apf.halted = true;
8800 r = 1;
8801 goto out;
8802 }
8803 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
8804 record_steal_time(vcpu);
8805 if (kvm_check_request(KVM_REQ_SMI, vcpu))
8806 process_smi(vcpu);
8807 if (kvm_check_request(KVM_REQ_NMI, vcpu))
8808 process_nmi(vcpu);
8809 if (kvm_check_request(KVM_REQ_PMU, vcpu))
8810 kvm_pmu_handle_event(vcpu);
8811 if (kvm_check_request(KVM_REQ_PMI, vcpu))
8812 kvm_pmu_deliver_pmi(vcpu);
8813 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
8814 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
8815 if (test_bit(vcpu->arch.pending_ioapic_eoi,
8816 vcpu->arch.ioapic_handled_vectors)) {
8817 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
8818 vcpu->run->eoi.vector =
8819 vcpu->arch.pending_ioapic_eoi;
8820 r = 0;
8821 goto out;
8822 }
8823 }
8824 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
8825 vcpu_scan_ioapic(vcpu);
8826 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
8827 vcpu_load_eoi_exitmap(vcpu);
8828 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
8829 kvm_vcpu_reload_apic_access_page(vcpu);
8830 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
8831 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
8832 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
8833 r = 0;
8834 goto out;
8835 }
8836 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
8837 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
8838 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
8839 r = 0;
8840 goto out;
8841 }
8842 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
8843 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
8844 vcpu->run->hyperv = vcpu->arch.hyperv.exit;
8845 r = 0;
8846 goto out;
8847 }
8848
8849
8850
8851
8852
8853
8854 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
8855 kvm_hv_process_stimers(vcpu);
8856 if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
8857 kvm_vcpu_update_apicv(vcpu);
8858 if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
8859 kvm_check_async_pf_completion(vcpu);
8860 if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
8861 kvm_x86_ops.msr_filter_changed(vcpu);
8862 }
8863
8864 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
8865 ++vcpu->stat.req_event;
8866 kvm_apic_accept_events(vcpu);
8867 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
8868 r = 1;
8869 goto out;
8870 }
8871
8872 inject_pending_event(vcpu, &req_immediate_exit);
8873 if (req_int_win)
8874 kvm_x86_ops.enable_irq_window(vcpu);
8875
8876 if (kvm_lapic_enabled(vcpu)) {
8877 update_cr8_intercept(vcpu);
8878 kvm_lapic_sync_to_vapic(vcpu);
8879 }
8880 }
8881
8882 r = kvm_mmu_reload(vcpu);
8883 if (unlikely(r)) {
8884 goto cancel_injection;
8885 }
8886
8887 preempt_disable();
8888
8889 kvm_x86_ops.prepare_guest_switch(vcpu);
8890
8891
8892
8893
8894
8895
8896 local_irq_disable();
8897 vcpu->mode = IN_GUEST_MODE;
8898
8899 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913 smp_mb__after_srcu_read_unlock();
8914
8915
8916
8917
8918
8919 if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
8920 kvm_x86_ops.sync_pir_to_irr(vcpu);
8921
8922 if (kvm_vcpu_exit_request(vcpu)) {
8923 vcpu->mode = OUTSIDE_GUEST_MODE;
8924 smp_wmb();
8925 local_irq_enable();
8926 preempt_enable();
8927 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8928 r = 1;
8929 goto cancel_injection;
8930 }
8931
8932 if (req_immediate_exit) {
8933 kvm_make_request(KVM_REQ_EVENT, vcpu);
8934 kvm_x86_ops.request_immediate_exit(vcpu);
8935 }
8936
8937 trace_kvm_entry(vcpu);
8938
8939 fpregs_assert_state_consistent();
8940 if (test_thread_flag(TIF_NEED_FPU_LOAD))
8941 switch_fpu_return();
8942
8943 if (unlikely(vcpu->arch.switch_db_regs)) {
8944 set_debugreg(0, 7);
8945 set_debugreg(vcpu->arch.eff_db[0], 0);
8946 set_debugreg(vcpu->arch.eff_db[1], 1);
8947 set_debugreg(vcpu->arch.eff_db[2], 2);
8948 set_debugreg(vcpu->arch.eff_db[3], 3);
8949 set_debugreg(vcpu->arch.dr6, 6);
8950 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8951 }
8952
8953 exit_fastpath = kvm_x86_ops.run(vcpu);
8954
8955
8956
8957
8958
8959
8960
8961 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
8962 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
8963 kvm_x86_ops.sync_dirty_debug_regs(vcpu);
8964 kvm_update_dr0123(vcpu);
8965 kvm_update_dr7(vcpu);
8966 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8967 }
8968
8969
8970
8971
8972
8973
8974
8975
8976 if (hw_breakpoint_active())
8977 hw_breakpoint_restore();
8978
8979 vcpu->arch.last_vmentry_cpu = vcpu->cpu;
8980 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
8981
8982 vcpu->mode = OUTSIDE_GUEST_MODE;
8983 smp_wmb();
8984
8985 kvm_x86_ops.handle_exit_irqoff(vcpu);
8986
8987
8988
8989
8990
8991
8992
8993
8994 kvm_before_interrupt(vcpu);
8995 local_irq_enable();
8996 ++vcpu->stat.exits;
8997 local_irq_disable();
8998 kvm_after_interrupt(vcpu);
8999
9000 if (lapic_in_kernel(vcpu)) {
9001 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
9002 if (delta != S64_MIN) {
9003 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
9004 vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
9005 }
9006 }
9007
9008 local_irq_enable();
9009 preempt_enable();
9010
9011 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
9012
9013
9014
9015
9016 if (unlikely(prof_on == KVM_PROFILING)) {
9017 unsigned long rip = kvm_rip_read(vcpu);
9018 profile_hit(KVM_PROFILING, (void *)rip);
9019 }
9020
9021 if (unlikely(vcpu->arch.tsc_always_catchup))
9022 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
9023
9024 if (vcpu->arch.apic_attention)
9025 kvm_lapic_sync_from_vapic(vcpu);
9026
9027 r = kvm_x86_ops.handle_exit(vcpu, exit_fastpath);
9028 return r;
9029
9030cancel_injection:
9031 if (req_immediate_exit)
9032 kvm_make_request(KVM_REQ_EVENT, vcpu);
9033 kvm_x86_ops.cancel_injection(vcpu);
9034 if (unlikely(vcpu->arch.apic_attention))
9035 kvm_lapic_sync_from_vapic(vcpu);
9036out:
9037 return r;
9038}
9039
9040static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
9041{
9042 if (!kvm_arch_vcpu_runnable(vcpu) &&
9043 (!kvm_x86_ops.pre_block || kvm_x86_ops.pre_block(vcpu) == 0)) {
9044 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
9045 kvm_vcpu_block(vcpu);
9046 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
9047
9048 if (kvm_x86_ops.post_block)
9049 kvm_x86_ops.post_block(vcpu);
9050
9051 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
9052 return 1;
9053 }
9054
9055 kvm_apic_accept_events(vcpu);
9056 switch(vcpu->arch.mp_state) {
9057 case KVM_MP_STATE_HALTED:
9058 vcpu->arch.pv.pv_unhalted = false;
9059 vcpu->arch.mp_state =
9060 KVM_MP_STATE_RUNNABLE;
9061 fallthrough;
9062 case KVM_MP_STATE_RUNNABLE:
9063 vcpu->arch.apf.halted = false;
9064 break;
9065 case KVM_MP_STATE_INIT_RECEIVED:
9066 break;
9067 default:
9068 return -EINTR;
9069 }
9070 return 1;
9071}
9072
9073static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
9074{
9075 if (is_guest_mode(vcpu))
9076 kvm_x86_ops.nested_ops->check_events(vcpu);
9077
9078 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
9079 !vcpu->arch.apf.halted);
9080}
9081
9082static int vcpu_run(struct kvm_vcpu *vcpu)
9083{
9084 int r;
9085 struct kvm *kvm = vcpu->kvm;
9086
9087 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
9088 vcpu->arch.l1tf_flush_l1d = true;
9089
9090 for (;;) {
9091 if (kvm_vcpu_running(vcpu)) {
9092 r = vcpu_enter_guest(vcpu);
9093 } else {
9094 r = vcpu_block(kvm, vcpu);
9095 }
9096
9097 if (r <= 0)
9098 break;
9099
9100 kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
9101 if (kvm_cpu_has_pending_timer(vcpu))
9102 kvm_inject_pending_timer_irqs(vcpu);
9103
9104 if (dm_request_for_irq_injection(vcpu) &&
9105 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
9106 r = 0;
9107 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
9108 ++vcpu->stat.request_irq_exits;
9109 break;
9110 }
9111
9112 if (__xfer_to_guest_mode_work_pending()) {
9113 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
9114 r = xfer_to_guest_mode_handle_work(vcpu);
9115 if (r)
9116 return r;
9117 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
9118 }
9119 }
9120
9121 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
9122
9123 return r;
9124}
9125
9126static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
9127{
9128 int r;
9129
9130 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
9131 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
9132 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
9133 return r;
9134}
9135
9136static int complete_emulated_pio(struct kvm_vcpu *vcpu)
9137{
9138 BUG_ON(!vcpu->arch.pio.count);
9139
9140 return complete_emulated_io(vcpu);
9141}
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
9162{
9163 struct kvm_run *run = vcpu->run;
9164 struct kvm_mmio_fragment *frag;
9165 unsigned len;
9166
9167 BUG_ON(!vcpu->mmio_needed);
9168
9169
9170 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
9171 len = min(8u, frag->len);
9172 if (!vcpu->mmio_is_write)
9173 memcpy(frag->data, run->mmio.data, len);
9174
9175 if (frag->len <= 8) {
9176
9177 frag++;
9178 vcpu->mmio_cur_fragment++;
9179 } else {
9180
9181 frag->data += len;
9182 frag->gpa += len;
9183 frag->len -= len;
9184 }
9185
9186 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
9187 vcpu->mmio_needed = 0;
9188
9189
9190 if (vcpu->mmio_is_write)
9191 return 1;
9192 vcpu->mmio_read_completed = 1;
9193 return complete_emulated_io(vcpu);
9194 }
9195
9196 run->exit_reason = KVM_EXIT_MMIO;
9197 run->mmio.phys_addr = frag->gpa;
9198 if (vcpu->mmio_is_write)
9199 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
9200 run->mmio.len = min(8u, frag->len);
9201 run->mmio.is_write = vcpu->mmio_is_write;
9202 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
9203 return 0;
9204}
9205
9206static void kvm_save_current_fpu(struct fpu *fpu)
9207{
9208
9209
9210
9211
9212 if (test_thread_flag(TIF_NEED_FPU_LOAD))
9213 memcpy(&fpu->state, ¤t->thread.fpu.state,
9214 fpu_kernel_xstate_size);
9215 else
9216 copy_fpregs_to_fpstate(fpu);
9217}
9218
9219
9220static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
9221{
9222 fpregs_lock();
9223
9224 kvm_save_current_fpu(vcpu->arch.user_fpu);
9225
9226
9227 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
9228 ~XFEATURE_MASK_PKRU);
9229
9230 fpregs_mark_activate();
9231 fpregs_unlock();
9232
9233 trace_kvm_fpu(1);
9234}
9235
9236
9237static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
9238{
9239 fpregs_lock();
9240
9241 kvm_save_current_fpu(vcpu->arch.guest_fpu);
9242
9243 copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
9244
9245 fpregs_mark_activate();
9246 fpregs_unlock();
9247
9248 ++vcpu->stat.fpu_reload;
9249 trace_kvm_fpu(0);
9250}
9251
9252int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
9253{
9254 struct kvm_run *kvm_run = vcpu->run;
9255 int r;
9256
9257 vcpu_load(vcpu);
9258 kvm_sigset_activate(vcpu);
9259 kvm_load_guest_fpu(vcpu);
9260
9261 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
9262 if (kvm_run->immediate_exit) {
9263 r = -EINTR;
9264 goto out;
9265 }
9266 kvm_vcpu_block(vcpu);
9267 kvm_apic_accept_events(vcpu);
9268 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
9269 r = -EAGAIN;
9270 if (signal_pending(current)) {
9271 r = -EINTR;
9272 kvm_run->exit_reason = KVM_EXIT_INTR;
9273 ++vcpu->stat.signal_exits;
9274 }
9275 goto out;
9276 }
9277
9278 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
9279 r = -EINVAL;
9280 goto out;
9281 }
9282
9283 if (kvm_run->kvm_dirty_regs) {
9284 r = sync_regs(vcpu);
9285 if (r != 0)
9286 goto out;
9287 }
9288
9289
9290 if (!lapic_in_kernel(vcpu)) {
9291 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
9292 r = -EINVAL;
9293 goto out;
9294 }
9295 }
9296
9297 if (unlikely(vcpu->arch.complete_userspace_io)) {
9298 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
9299 vcpu->arch.complete_userspace_io = NULL;
9300 r = cui(vcpu);
9301 if (r <= 0)
9302 goto out;
9303 } else
9304 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
9305
9306 if (kvm_run->immediate_exit)
9307 r = -EINTR;
9308 else
9309 r = vcpu_run(vcpu);
9310
9311out:
9312 kvm_put_guest_fpu(vcpu);
9313 if (kvm_run->kvm_valid_regs)
9314 store_regs(vcpu);
9315 post_kvm_run_save(vcpu);
9316 kvm_sigset_deactivate(vcpu);
9317
9318 vcpu_put(vcpu);
9319 return r;
9320}
9321
9322static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
9323{
9324 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
9325
9326
9327
9328
9329
9330
9331
9332 emulator_writeback_register_cache(vcpu->arch.emulate_ctxt);
9333 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
9334 }
9335 regs->rax = kvm_rax_read(vcpu);
9336 regs->rbx = kvm_rbx_read(vcpu);
9337 regs->rcx = kvm_rcx_read(vcpu);
9338 regs->rdx = kvm_rdx_read(vcpu);
9339 regs->rsi = kvm_rsi_read(vcpu);
9340 regs->rdi = kvm_rdi_read(vcpu);
9341 regs->rsp = kvm_rsp_read(vcpu);
9342 regs->rbp = kvm_rbp_read(vcpu);
9343#ifdef CONFIG_X86_64
9344 regs->r8 = kvm_r8_read(vcpu);
9345 regs->r9 = kvm_r9_read(vcpu);
9346 regs->r10 = kvm_r10_read(vcpu);
9347 regs->r11 = kvm_r11_read(vcpu);
9348 regs->r12 = kvm_r12_read(vcpu);
9349 regs->r13 = kvm_r13_read(vcpu);
9350 regs->r14 = kvm_r14_read(vcpu);
9351 regs->r15 = kvm_r15_read(vcpu);
9352#endif
9353
9354 regs->rip = kvm_rip_read(vcpu);
9355 regs->rflags = kvm_get_rflags(vcpu);
9356}
9357
9358int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
9359{
9360 vcpu_load(vcpu);
9361 __get_regs(vcpu, regs);
9362 vcpu_put(vcpu);
9363 return 0;
9364}
9365
9366static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
9367{
9368 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
9369 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
9370
9371 kvm_rax_write(vcpu, regs->rax);
9372 kvm_rbx_write(vcpu, regs->rbx);
9373 kvm_rcx_write(vcpu, regs->rcx);
9374 kvm_rdx_write(vcpu, regs->rdx);
9375 kvm_rsi_write(vcpu, regs->rsi);
9376 kvm_rdi_write(vcpu, regs->rdi);
9377 kvm_rsp_write(vcpu, regs->rsp);
9378 kvm_rbp_write(vcpu, regs->rbp);
9379#ifdef CONFIG_X86_64
9380 kvm_r8_write(vcpu, regs->r8);
9381 kvm_r9_write(vcpu, regs->r9);
9382 kvm_r10_write(vcpu, regs->r10);
9383 kvm_r11_write(vcpu, regs->r11);
9384 kvm_r12_write(vcpu, regs->r12);
9385 kvm_r13_write(vcpu, regs->r13);
9386 kvm_r14_write(vcpu, regs->r14);
9387 kvm_r15_write(vcpu, regs->r15);
9388#endif
9389
9390 kvm_rip_write(vcpu, regs->rip);
9391 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
9392
9393 vcpu->arch.exception.pending = false;
9394
9395 kvm_make_request(KVM_REQ_EVENT, vcpu);
9396}
9397
9398int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
9399{
9400 vcpu_load(vcpu);
9401 __set_regs(vcpu, regs);
9402 vcpu_put(vcpu);
9403 return 0;
9404}
9405
9406void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
9407{
9408 struct kvm_segment cs;
9409
9410 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
9411 *db = cs.db;
9412 *l = cs.l;
9413}
9414EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
9415
9416static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
9417{
9418 struct desc_ptr dt;
9419
9420 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
9421 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
9422 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
9423 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
9424 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
9425 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
9426
9427 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
9428 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
9429
9430 kvm_x86_ops.get_idt(vcpu, &dt);
9431 sregs->idt.limit = dt.size;
9432 sregs->idt.base = dt.address;
9433 kvm_x86_ops.get_gdt(vcpu, &dt);
9434 sregs->gdt.limit = dt.size;
9435 sregs->gdt.base = dt.address;
9436
9437 sregs->cr0 = kvm_read_cr0(vcpu);
9438 sregs->cr2 = vcpu->arch.cr2;
9439 sregs->cr3 = kvm_read_cr3(vcpu);
9440 sregs->cr4 = kvm_read_cr4(vcpu);
9441 sregs->cr8 = kvm_get_cr8(vcpu);
9442 sregs->efer = vcpu->arch.efer;
9443 sregs->apic_base = kvm_get_apic_base(vcpu);
9444
9445 memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap));
9446
9447 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
9448 set_bit(vcpu->arch.interrupt.nr,
9449 (unsigned long *)sregs->interrupt_bitmap);
9450}
9451
9452int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
9453 struct kvm_sregs *sregs)
9454{
9455 vcpu_load(vcpu);
9456 __get_sregs(vcpu, sregs);
9457 vcpu_put(vcpu);
9458 return 0;
9459}
9460
9461int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
9462 struct kvm_mp_state *mp_state)
9463{
9464 vcpu_load(vcpu);
9465 if (kvm_mpx_supported())
9466 kvm_load_guest_fpu(vcpu);
9467
9468 kvm_apic_accept_events(vcpu);
9469 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
9470 vcpu->arch.pv.pv_unhalted)
9471 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
9472 else
9473 mp_state->mp_state = vcpu->arch.mp_state;
9474
9475 if (kvm_mpx_supported())
9476 kvm_put_guest_fpu(vcpu);
9477 vcpu_put(vcpu);
9478 return 0;
9479}
9480
9481int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
9482 struct kvm_mp_state *mp_state)
9483{
9484 int ret = -EINVAL;
9485
9486 vcpu_load(vcpu);
9487
9488 if (!lapic_in_kernel(vcpu) &&
9489 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
9490 goto out;
9491
9492
9493
9494
9495
9496
9497 if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
9498 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
9499 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
9500 goto out;
9501
9502 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
9503 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
9504 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
9505 } else
9506 vcpu->arch.mp_state = mp_state->mp_state;
9507 kvm_make_request(KVM_REQ_EVENT, vcpu);
9508
9509 ret = 0;
9510out:
9511 vcpu_put(vcpu);
9512 return ret;
9513}
9514
9515int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
9516 int reason, bool has_error_code, u32 error_code)
9517{
9518 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
9519 int ret;
9520
9521 init_emulate_ctxt(vcpu);
9522
9523 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
9524 has_error_code, error_code);
9525 if (ret) {
9526 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
9527 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
9528 vcpu->run->internal.ndata = 0;
9529 return 0;
9530 }
9531
9532 kvm_rip_write(vcpu, ctxt->eip);
9533 kvm_set_rflags(vcpu, ctxt->eflags);
9534 return 1;
9535}
9536EXPORT_SYMBOL_GPL(kvm_task_switch);
9537
9538static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
9539{
9540 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
9541
9542
9543
9544
9545
9546 if (!(sregs->cr4 & X86_CR4_PAE)
9547 || !(sregs->efer & EFER_LMA))
9548 return -EINVAL;
9549 } else {
9550
9551
9552
9553
9554 if (sregs->efer & EFER_LMA || sregs->cs.l)
9555 return -EINVAL;
9556 }
9557
9558 return kvm_valid_cr4(vcpu, sregs->cr4);
9559}
9560
9561static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
9562{
9563 struct msr_data apic_base_msr;
9564 int mmu_reset_needed = 0;
9565 int cpuid_update_needed = 0;
9566 int pending_vec, max_bits, idx;
9567 struct desc_ptr dt;
9568 int ret = -EINVAL;
9569
9570 if (kvm_valid_sregs(vcpu, sregs))
9571 goto out;
9572
9573 apic_base_msr.data = sregs->apic_base;
9574 apic_base_msr.host_initiated = true;
9575 if (kvm_set_apic_base(vcpu, &apic_base_msr))
9576 goto out;
9577
9578 dt.size = sregs->idt.limit;
9579 dt.address = sregs->idt.base;
9580 kvm_x86_ops.set_idt(vcpu, &dt);
9581 dt.size = sregs->gdt.limit;
9582 dt.address = sregs->gdt.base;
9583 kvm_x86_ops.set_gdt(vcpu, &dt);
9584
9585 vcpu->arch.cr2 = sregs->cr2;
9586 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
9587 vcpu->arch.cr3 = sregs->cr3;
9588 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
9589
9590 kvm_set_cr8(vcpu, sregs->cr8);
9591
9592 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
9593 kvm_x86_ops.set_efer(vcpu, sregs->efer);
9594
9595 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
9596 kvm_x86_ops.set_cr0(vcpu, sregs->cr0);
9597 vcpu->arch.cr0 = sregs->cr0;
9598
9599 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
9600 cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
9601 (X86_CR4_OSXSAVE | X86_CR4_PKE));
9602 kvm_x86_ops.set_cr4(vcpu, sregs->cr4);
9603 if (cpuid_update_needed)
9604 kvm_update_cpuid_runtime(vcpu);
9605
9606 idx = srcu_read_lock(&vcpu->kvm->srcu);
9607 if (is_pae_paging(vcpu)) {
9608 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
9609 mmu_reset_needed = 1;
9610 }
9611 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9612
9613 if (mmu_reset_needed)
9614 kvm_mmu_reset_context(vcpu);
9615
9616 max_bits = KVM_NR_INTERRUPTS;
9617 pending_vec = find_first_bit(
9618 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
9619 if (pending_vec < max_bits) {
9620 kvm_queue_interrupt(vcpu, pending_vec, false);
9621 pr_debug("Set back pending irq %d\n", pending_vec);
9622 }
9623
9624 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
9625 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
9626 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
9627 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
9628 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
9629 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
9630
9631 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
9632 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
9633
9634 update_cr8_intercept(vcpu);
9635
9636
9637 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
9638 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
9639 !is_protmode(vcpu))
9640 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9641
9642 kvm_make_request(KVM_REQ_EVENT, vcpu);
9643
9644 ret = 0;
9645out:
9646 return ret;
9647}
9648
9649int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
9650 struct kvm_sregs *sregs)
9651{
9652 int ret;
9653
9654 vcpu_load(vcpu);
9655 ret = __set_sregs(vcpu, sregs);
9656 vcpu_put(vcpu);
9657 return ret;
9658}
9659
9660int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
9661 struct kvm_guest_debug *dbg)
9662{
9663 unsigned long rflags;
9664 int i, r;
9665
9666 vcpu_load(vcpu);
9667
9668 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
9669 r = -EBUSY;
9670 if (vcpu->arch.exception.pending)
9671 goto out;
9672 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
9673 kvm_queue_exception(vcpu, DB_VECTOR);
9674 else
9675 kvm_queue_exception(vcpu, BP_VECTOR);
9676 }
9677
9678
9679
9680
9681
9682 rflags = kvm_get_rflags(vcpu);
9683
9684 vcpu->guest_debug = dbg->control;
9685 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
9686 vcpu->guest_debug = 0;
9687
9688 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
9689 for (i = 0; i < KVM_NR_DB_REGS; ++i)
9690 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
9691 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
9692 } else {
9693 for (i = 0; i < KVM_NR_DB_REGS; i++)
9694 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
9695 }
9696 kvm_update_dr7(vcpu);
9697
9698 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
9699 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
9700 get_segment_base(vcpu, VCPU_SREG_CS);
9701
9702
9703
9704
9705
9706 kvm_set_rflags(vcpu, rflags);
9707
9708 kvm_x86_ops.update_exception_bitmap(vcpu);
9709
9710 r = 0;
9711
9712out:
9713 vcpu_put(vcpu);
9714 return r;
9715}
9716
9717
9718
9719
9720int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
9721 struct kvm_translation *tr)
9722{
9723 unsigned long vaddr = tr->linear_address;
9724 gpa_t gpa;
9725 int idx;
9726
9727 vcpu_load(vcpu);
9728
9729 idx = srcu_read_lock(&vcpu->kvm->srcu);
9730 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
9731 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9732 tr->physical_address = gpa;
9733 tr->valid = gpa != UNMAPPED_GVA;
9734 tr->writeable = 1;
9735 tr->usermode = 0;
9736
9737 vcpu_put(vcpu);
9738 return 0;
9739}
9740
9741int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9742{
9743 struct fxregs_state *fxsave;
9744
9745 vcpu_load(vcpu);
9746
9747 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
9748 memcpy(fpu->fpr, fxsave->st_space, 128);
9749 fpu->fcw = fxsave->cwd;
9750 fpu->fsw = fxsave->swd;
9751 fpu->ftwx = fxsave->twd;
9752 fpu->last_opcode = fxsave->fop;
9753 fpu->last_ip = fxsave->rip;
9754 fpu->last_dp = fxsave->rdp;
9755 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
9756
9757 vcpu_put(vcpu);
9758 return 0;
9759}
9760
9761int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9762{
9763 struct fxregs_state *fxsave;
9764
9765 vcpu_load(vcpu);
9766
9767 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
9768
9769 memcpy(fxsave->st_space, fpu->fpr, 128);
9770 fxsave->cwd = fpu->fcw;
9771 fxsave->swd = fpu->fsw;
9772 fxsave->twd = fpu->ftwx;
9773 fxsave->fop = fpu->last_opcode;
9774 fxsave->rip = fpu->last_ip;
9775 fxsave->rdp = fpu->last_dp;
9776 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
9777
9778 vcpu_put(vcpu);
9779 return 0;
9780}
9781
9782static void store_regs(struct kvm_vcpu *vcpu)
9783{
9784 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
9785
9786 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
9787 __get_regs(vcpu, &vcpu->run->s.regs.regs);
9788
9789 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
9790 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
9791
9792 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
9793 kvm_vcpu_ioctl_x86_get_vcpu_events(
9794 vcpu, &vcpu->run->s.regs.events);
9795}
9796
9797static int sync_regs(struct kvm_vcpu *vcpu)
9798{
9799 if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
9800 return -EINVAL;
9801
9802 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
9803 __set_regs(vcpu, &vcpu->run->s.regs.regs);
9804 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
9805 }
9806 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
9807 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
9808 return -EINVAL;
9809 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
9810 }
9811 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
9812 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
9813 vcpu, &vcpu->run->s.regs.events))
9814 return -EINVAL;
9815 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
9816 }
9817
9818 return 0;
9819}
9820
9821static void fx_init(struct kvm_vcpu *vcpu)
9822{
9823 fpstate_init(&vcpu->arch.guest_fpu->state);
9824 if (boot_cpu_has(X86_FEATURE_XSAVES))
9825 vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
9826 host_xcr0 | XSTATE_COMPACTION_ENABLED;
9827
9828
9829
9830
9831 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
9832
9833 vcpu->arch.cr0 |= X86_CR0_ET;
9834}
9835
9836int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
9837{
9838 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
9839 pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
9840 "guest TSC will not be reliable\n");
9841
9842 return 0;
9843}
9844
9845int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
9846{
9847 struct page *page;
9848 int r;
9849
9850 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
9851 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9852 else
9853 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
9854
9855 kvm_set_tsc_khz(vcpu, max_tsc_khz);
9856
9857 r = kvm_mmu_create(vcpu);
9858 if (r < 0)
9859 return r;
9860
9861 if (irqchip_in_kernel(vcpu->kvm)) {
9862 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
9863 if (r < 0)
9864 goto fail_mmu_destroy;
9865 if (kvm_apicv_activated(vcpu->kvm))
9866 vcpu->arch.apicv_active = true;
9867 } else
9868 static_key_slow_inc(&kvm_no_apic_vcpu);
9869
9870 r = -ENOMEM;
9871
9872 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
9873 if (!page)
9874 goto fail_free_lapic;
9875 vcpu->arch.pio_data = page_address(page);
9876
9877 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
9878 GFP_KERNEL_ACCOUNT);
9879 if (!vcpu->arch.mce_banks)
9880 goto fail_free_pio_data;
9881 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
9882
9883 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
9884 GFP_KERNEL_ACCOUNT))
9885 goto fail_free_mce_banks;
9886
9887 if (!alloc_emulate_ctxt(vcpu))
9888 goto free_wbinvd_dirty_mask;
9889
9890 vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
9891 GFP_KERNEL_ACCOUNT);
9892 if (!vcpu->arch.user_fpu) {
9893 pr_err("kvm: failed to allocate userspace's fpu\n");
9894 goto free_emulate_ctxt;
9895 }
9896
9897 vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
9898 GFP_KERNEL_ACCOUNT);
9899 if (!vcpu->arch.guest_fpu) {
9900 pr_err("kvm: failed to allocate vcpu's fpu\n");
9901 goto free_user_fpu;
9902 }
9903 fx_init(vcpu);
9904
9905 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
9906
9907 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
9908
9909 kvm_async_pf_hash_reset(vcpu);
9910 kvm_pmu_init(vcpu);
9911
9912 vcpu->arch.pending_external_vector = -1;
9913 vcpu->arch.preempted_in_kernel = false;
9914
9915 kvm_hv_vcpu_init(vcpu);
9916
9917 r = kvm_x86_ops.vcpu_create(vcpu);
9918 if (r)
9919 goto free_guest_fpu;
9920
9921 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
9922 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
9923 kvm_vcpu_mtrr_init(vcpu);
9924 vcpu_load(vcpu);
9925 kvm_vcpu_reset(vcpu, false);
9926 kvm_init_mmu(vcpu, false);
9927 vcpu_put(vcpu);
9928 return 0;
9929
9930free_guest_fpu:
9931 kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
9932free_user_fpu:
9933 kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
9934free_emulate_ctxt:
9935 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
9936free_wbinvd_dirty_mask:
9937 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
9938fail_free_mce_banks:
9939 kfree(vcpu->arch.mce_banks);
9940fail_free_pio_data:
9941 free_page((unsigned long)vcpu->arch.pio_data);
9942fail_free_lapic:
9943 kvm_free_lapic(vcpu);
9944fail_mmu_destroy:
9945 kvm_mmu_destroy(vcpu);
9946 return r;
9947}
9948
9949void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
9950{
9951 struct kvm *kvm = vcpu->kvm;
9952
9953 kvm_hv_vcpu_postcreate(vcpu);
9954
9955 if (mutex_lock_killable(&vcpu->mutex))
9956 return;
9957 vcpu_load(vcpu);
9958 kvm_synchronize_tsc(vcpu, 0);
9959 vcpu_put(vcpu);
9960
9961
9962 vcpu->arch.msr_kvm_poll_control = 1;
9963
9964 mutex_unlock(&vcpu->mutex);
9965
9966 if (kvmclock_periodic_sync && vcpu->vcpu_idx == 0)
9967 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
9968 KVMCLOCK_SYNC_PERIOD);
9969}
9970
9971void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
9972{
9973 struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
9974 int idx;
9975
9976 kvm_release_pfn(cache->pfn, cache->dirty, cache);
9977
9978 kvmclock_reset(vcpu);
9979
9980 kvm_x86_ops.vcpu_free(vcpu);
9981
9982 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
9983 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
9984 kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
9985 kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
9986
9987 kvm_hv_vcpu_uninit(vcpu);
9988 kvm_pmu_destroy(vcpu);
9989 kfree(vcpu->arch.mce_banks);
9990 kvm_free_lapic(vcpu);
9991 idx = srcu_read_lock(&vcpu->kvm->srcu);
9992 kvm_mmu_destroy(vcpu);
9993 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9994 free_page((unsigned long)vcpu->arch.pio_data);
9995 kvfree(vcpu->arch.cpuid_entries);
9996 if (!lapic_in_kernel(vcpu))
9997 static_key_slow_dec(&kvm_no_apic_vcpu);
9998}
9999
10000void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
10001{
10002 kvm_lapic_reset(vcpu, init_event);
10003
10004 vcpu->arch.hflags = 0;
10005
10006 vcpu->arch.smi_pending = 0;
10007 vcpu->arch.smi_count = 0;
10008 atomic_set(&vcpu->arch.nmi_queued, 0);
10009 vcpu->arch.nmi_pending = 0;
10010 vcpu->arch.nmi_injected = false;
10011 kvm_clear_interrupt_queue(vcpu);
10012 kvm_clear_exception_queue(vcpu);
10013
10014 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
10015 kvm_update_dr0123(vcpu);
10016 vcpu->arch.dr6 = DR6_INIT;
10017 vcpu->arch.dr7 = DR7_FIXED_1;
10018 kvm_update_dr7(vcpu);
10019
10020 vcpu->arch.cr2 = 0;
10021
10022 kvm_make_request(KVM_REQ_EVENT, vcpu);
10023 vcpu->arch.apf.msr_en_val = 0;
10024 vcpu->arch.apf.msr_int_val = 0;
10025 vcpu->arch.st.msr_val = 0;
10026
10027 kvmclock_reset(vcpu);
10028
10029 kvm_clear_async_pf_completion_queue(vcpu);
10030 kvm_async_pf_hash_reset(vcpu);
10031 vcpu->arch.apf.halted = false;
10032
10033 if (kvm_mpx_supported()) {
10034 void *mpx_state_buffer;
10035
10036
10037
10038
10039
10040 if (init_event)
10041 kvm_put_guest_fpu(vcpu);
10042 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
10043 XFEATURE_BNDREGS);
10044 if (mpx_state_buffer)
10045 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
10046 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
10047 XFEATURE_BNDCSR);
10048 if (mpx_state_buffer)
10049 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
10050 if (init_event)
10051 kvm_load_guest_fpu(vcpu);
10052 }
10053
10054 if (!init_event) {
10055 kvm_pmu_reset(vcpu);
10056 vcpu->arch.smbase = 0x30000;
10057
10058 vcpu->arch.msr_misc_features_enables = 0;
10059
10060 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
10061 }
10062
10063 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
10064 vcpu->arch.regs_avail = ~0;
10065 vcpu->arch.regs_dirty = ~0;
10066
10067 vcpu->arch.ia32_xss = 0;
10068
10069 kvm_x86_ops.vcpu_reset(vcpu, init_event);
10070}
10071
10072void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
10073{
10074 struct kvm_segment cs;
10075
10076 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
10077 cs.selector = vector << 8;
10078 cs.base = vector << 12;
10079 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
10080 kvm_rip_write(vcpu, 0);
10081}
10082
10083int kvm_arch_hardware_enable(void)
10084{
10085 struct kvm *kvm;
10086 struct kvm_vcpu *vcpu;
10087 int i;
10088 int ret;
10089 u64 local_tsc;
10090 u64 max_tsc = 0;
10091 bool stable, backwards_tsc = false;
10092
10093 kvm_user_return_msr_cpu_online();
10094 ret = kvm_x86_ops.hardware_enable();
10095 if (ret != 0)
10096 return ret;
10097
10098 local_tsc = rdtsc();
10099 stable = !kvm_check_tsc_unstable();
10100 list_for_each_entry(kvm, &vm_list, vm_list) {
10101 kvm_for_each_vcpu(i, vcpu, kvm) {
10102 if (!stable && vcpu->cpu == smp_processor_id())
10103 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
10104 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
10105 backwards_tsc = true;
10106 if (vcpu->arch.last_host_tsc > max_tsc)
10107 max_tsc = vcpu->arch.last_host_tsc;
10108 }
10109 }
10110 }
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150 if (backwards_tsc) {
10151 u64 delta_cyc = max_tsc - local_tsc;
10152 list_for_each_entry(kvm, &vm_list, vm_list) {
10153 kvm->arch.backwards_tsc_observed = true;
10154 kvm_for_each_vcpu(i, vcpu, kvm) {
10155 vcpu->arch.tsc_offset_adjustment += delta_cyc;
10156 vcpu->arch.last_host_tsc = local_tsc;
10157 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
10158 }
10159
10160
10161
10162
10163
10164
10165
10166 kvm->arch.last_tsc_nsec = 0;
10167 kvm->arch.last_tsc_write = 0;
10168 }
10169
10170 }
10171 return 0;
10172}
10173
10174void kvm_arch_hardware_disable(void)
10175{
10176 kvm_x86_ops.hardware_disable();
10177 drop_user_return_notifiers();
10178}
10179
10180int kvm_arch_hardware_setup(void *opaque)
10181{
10182 struct kvm_x86_init_ops *ops = opaque;
10183 int r;
10184
10185 rdmsrl_safe(MSR_EFER, &host_efer);
10186
10187 if (boot_cpu_has(X86_FEATURE_XSAVES))
10188 rdmsrl(MSR_IA32_XSS, host_xss);
10189
10190 r = ops->hardware_setup();
10191 if (r != 0)
10192 return r;
10193
10194 memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
10195
10196 if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
10197 supported_xss = 0;
10198
10199#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
10200 cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
10201#undef __kvm_cpu_cap_has
10202
10203 if (kvm_has_tsc_control) {
10204
10205
10206
10207
10208
10209
10210 u64 max = min(0x7fffffffULL,
10211 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
10212 kvm_max_guest_tsc_khz = max;
10213
10214 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
10215 }
10216
10217 kvm_init_msr_list();
10218 return 0;
10219}
10220
10221void kvm_arch_hardware_unsetup(void)
10222{
10223 kvm_x86_ops.hardware_unsetup();
10224}
10225
10226int kvm_arch_check_processor_compat(void *opaque)
10227{
10228 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
10229 struct kvm_x86_init_ops *ops = opaque;
10230
10231 WARN_ON(!irqs_disabled());
10232
10233 if (__cr4_reserved_bits(cpu_has, c) !=
10234 __cr4_reserved_bits(cpu_has, &boot_cpu_data))
10235 return -EIO;
10236
10237 return ops->check_processor_compatibility();
10238}
10239
10240bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
10241{
10242 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
10243}
10244EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
10245
10246bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
10247{
10248 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
10249}
10250
10251struct static_key kvm_no_apic_vcpu __read_mostly;
10252EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
10253
10254void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
10255{
10256 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
10257
10258 vcpu->arch.l1tf_flush_l1d = true;
10259 if (pmu->version && unlikely(pmu->event_count)) {
10260 pmu->need_cleanup = true;
10261 kvm_make_request(KVM_REQ_PMU, vcpu);
10262 }
10263 kvm_x86_ops.sched_in(vcpu, cpu);
10264}
10265
10266void kvm_arch_free_vm(struct kvm *kvm)
10267{
10268 kfree(kvm->arch.hyperv.hv_pa_pg);
10269 vfree(kvm);
10270}
10271
10272
10273int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
10274{
10275 if (type)
10276 return -EINVAL;
10277
10278 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
10279 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
10280 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
10281 INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
10282 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
10283 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
10284
10285
10286 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
10287
10288 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
10289 &kvm->arch.irq_sources_bitmap);
10290
10291 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
10292 mutex_init(&kvm->arch.apic_map_lock);
10293 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
10294
10295 kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
10296 pvclock_update_vm_gtod_copy(kvm);
10297
10298 kvm->arch.guest_can_read_msr_platform_info = true;
10299
10300 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
10301 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
10302
10303 kvm_hv_init_vm(kvm);
10304 kvm_page_track_init(kvm);
10305 kvm_mmu_init_vm(kvm);
10306
10307 return kvm_x86_ops.vm_init(kvm);
10308}
10309
10310int kvm_arch_post_init_vm(struct kvm *kvm)
10311{
10312 return kvm_mmu_post_init_vm(kvm);
10313}
10314
10315static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
10316{
10317 vcpu_load(vcpu);
10318 kvm_mmu_unload(vcpu);
10319 vcpu_put(vcpu);
10320}
10321
10322static void kvm_free_vcpus(struct kvm *kvm)
10323{
10324 unsigned int i;
10325 struct kvm_vcpu *vcpu;
10326
10327
10328
10329
10330 kvm_for_each_vcpu(i, vcpu, kvm) {
10331 kvm_clear_async_pf_completion_queue(vcpu);
10332 kvm_unload_vcpu_mmu(vcpu);
10333 }
10334 kvm_for_each_vcpu(i, vcpu, kvm)
10335 kvm_vcpu_destroy(vcpu);
10336
10337 mutex_lock(&kvm->lock);
10338 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
10339 kvm->vcpus[i] = NULL;
10340
10341 atomic_set(&kvm->online_vcpus, 0);
10342 mutex_unlock(&kvm->lock);
10343}
10344
10345void kvm_arch_sync_events(struct kvm *kvm)
10346{
10347 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
10348 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
10349 kvm_free_pit(kvm);
10350}
10351
10352int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
10353{
10354 int i, r;
10355 unsigned long hva, old_npages;
10356 struct kvm_memslots *slots = kvm_memslots(kvm);
10357 struct kvm_memory_slot *slot;
10358
10359
10360 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
10361 return -EINVAL;
10362
10363 slot = id_to_memslot(slots, id);
10364 if (size) {
10365 if (slot && slot->npages)
10366 return -EEXIST;
10367
10368
10369
10370
10371
10372 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
10373 MAP_SHARED | MAP_ANONYMOUS, 0);
10374 if (IS_ERR((void *)hva))
10375 return PTR_ERR((void *)hva);
10376 } else {
10377 if (!slot || !slot->npages)
10378 return 0;
10379
10380 old_npages = slot->npages;
10381 hva = 0;
10382 }
10383
10384 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
10385 struct kvm_userspace_memory_region m;
10386
10387 m.slot = id | (i << 16);
10388 m.flags = 0;
10389 m.guest_phys_addr = gpa;
10390 m.userspace_addr = hva;
10391 m.memory_size = size;
10392 r = __kvm_set_memory_region(kvm, &m);
10393 if (r < 0)
10394 return r;
10395 }
10396
10397 if (!size)
10398 vm_munmap(hva, old_npages * PAGE_SIZE);
10399
10400 return 0;
10401}
10402EXPORT_SYMBOL_GPL(__x86_set_memory_region);
10403
10404void kvm_arch_pre_destroy_vm(struct kvm *kvm)
10405{
10406 kvm_mmu_pre_destroy_vm(kvm);
10407}
10408
10409void kvm_arch_destroy_vm(struct kvm *kvm)
10410{
10411 u32 i;
10412
10413 if (current->mm == kvm->mm) {
10414
10415
10416
10417
10418
10419 mutex_lock(&kvm->slots_lock);
10420 __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
10421 0, 0);
10422 __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
10423 0, 0);
10424 __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
10425 mutex_unlock(&kvm->slots_lock);
10426 }
10427 if (kvm_x86_ops.vm_destroy)
10428 kvm_x86_ops.vm_destroy(kvm);
10429 for (i = 0; i < kvm->arch.msr_filter.count; i++)
10430 kfree(kvm->arch.msr_filter.ranges[i].bitmap);
10431 kvm_pic_destroy(kvm);
10432 kvm_ioapic_destroy(kvm);
10433 kvm_free_vcpus(kvm);
10434 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
10435 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
10436 kvm_mmu_uninit_vm(kvm);
10437 kvm_page_track_cleanup(kvm);
10438 kvm_hv_destroy_vm(kvm);
10439}
10440
10441void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
10442{
10443 int i;
10444
10445 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
10446 kvfree(slot->arch.rmap[i]);
10447 slot->arch.rmap[i] = NULL;
10448
10449 if (i == 0)
10450 continue;
10451
10452 kvfree(slot->arch.lpage_info[i - 1]);
10453 slot->arch.lpage_info[i - 1] = NULL;
10454 }
10455
10456 kvm_page_track_free_memslot(slot);
10457}
10458
10459static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
10460 unsigned long npages)
10461{
10462 int i;
10463
10464
10465
10466
10467
10468
10469 memset(&slot->arch, 0, sizeof(slot->arch));
10470
10471 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
10472 struct kvm_lpage_info *linfo;
10473 unsigned long ugfn;
10474 int lpages;
10475 int level = i + 1;
10476
10477 lpages = gfn_to_index(slot->base_gfn + npages - 1,
10478 slot->base_gfn, level) + 1;
10479
10480 slot->arch.rmap[i] =
10481 kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
10482 GFP_KERNEL_ACCOUNT);
10483 if (!slot->arch.rmap[i])
10484 goto out_free;
10485 if (i == 0)
10486 continue;
10487
10488 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
10489 if (!linfo)
10490 goto out_free;
10491
10492 slot->arch.lpage_info[i - 1] = linfo;
10493
10494 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
10495 linfo[0].disallow_lpage = 1;
10496 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
10497 linfo[lpages - 1].disallow_lpage = 1;
10498 ugfn = slot->userspace_addr >> PAGE_SHIFT;
10499
10500
10501
10502
10503 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1)) {
10504 unsigned long j;
10505
10506 for (j = 0; j < lpages; ++j)
10507 linfo[j].disallow_lpage = 1;
10508 }
10509 }
10510
10511 if (kvm_page_track_create_memslot(slot, npages))
10512 goto out_free;
10513
10514 return 0;
10515
10516out_free:
10517 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
10518 kvfree(slot->arch.rmap[i]);
10519 slot->arch.rmap[i] = NULL;
10520 if (i == 0)
10521 continue;
10522
10523 kvfree(slot->arch.lpage_info[i - 1]);
10524 slot->arch.lpage_info[i - 1] = NULL;
10525 }
10526 return -ENOMEM;
10527}
10528
10529void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
10530{
10531 struct kvm_vcpu *vcpu;
10532 int i;
10533
10534
10535
10536
10537
10538 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
10539
10540
10541 kvm_for_each_vcpu(i, vcpu, kvm)
10542 kvm_vcpu_kick(vcpu);
10543}
10544
10545int kvm_arch_prepare_memory_region(struct kvm *kvm,
10546 struct kvm_memory_slot *memslot,
10547 const struct kvm_userspace_memory_region *mem,
10548 enum kvm_mr_change change)
10549{
10550 if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
10551 return kvm_alloc_memslot_metadata(memslot,
10552 mem->memory_size >> PAGE_SHIFT);
10553 return 0;
10554}
10555
10556static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
10557 struct kvm_memory_slot *old,
10558 struct kvm_memory_slot *new,
10559 enum kvm_mr_change change)
10560{
10561
10562
10563
10564
10565 if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
10566 return;
10567
10568
10569
10570
10571
10572
10573
10574
10575
10576
10577
10578
10579
10580
10581
10582
10583
10584
10585 if ((old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
10586 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
10587 kvm_mmu_zap_collapsible_sptes(kvm, new);
10588
10589
10590
10591
10592
10593
10594
10595
10596
10597
10598
10599
10600
10601
10602
10603
10604
10605
10606
10607
10608
10609
10610
10611
10612
10613
10614
10615
10616
10617 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
10618 if (kvm_x86_ops.slot_enable_log_dirty) {
10619 kvm_x86_ops.slot_enable_log_dirty(kvm, new);
10620 } else {
10621 int level =
10622 kvm_dirty_log_manual_protect_and_init_set(kvm) ?
10623 PG_LEVEL_2M : PG_LEVEL_4K;
10624
10625
10626
10627
10628
10629
10630
10631
10632
10633 kvm_mmu_slot_remove_write_access(kvm, new, level);
10634 }
10635 } else {
10636 if (kvm_x86_ops.slot_disable_log_dirty)
10637 kvm_x86_ops.slot_disable_log_dirty(kvm, new);
10638 }
10639}
10640
10641void kvm_arch_commit_memory_region(struct kvm *kvm,
10642 const struct kvm_userspace_memory_region *mem,
10643 struct kvm_memory_slot *old,
10644 const struct kvm_memory_slot *new,
10645 enum kvm_mr_change change)
10646{
10647 if (!kvm->arch.n_requested_mmu_pages)
10648 kvm_mmu_change_mmu_pages(kvm,
10649 kvm_mmu_calculate_default_mmu_pages(kvm));
10650
10651
10652
10653
10654 kvm_mmu_slot_apply_flags(kvm, old, (struct kvm_memory_slot *) new, change);
10655
10656
10657 if (change == KVM_MR_MOVE)
10658 kvm_arch_free_memslot(kvm, old);
10659}
10660
10661void kvm_arch_flush_shadow_all(struct kvm *kvm)
10662{
10663 kvm_mmu_zap_all(kvm);
10664}
10665
10666void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
10667 struct kvm_memory_slot *slot)
10668{
10669 kvm_page_track_flush_slot(kvm, slot);
10670}
10671
10672static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
10673{
10674 return (is_guest_mode(vcpu) &&
10675 kvm_x86_ops.guest_apic_has_interrupt &&
10676 kvm_x86_ops.guest_apic_has_interrupt(vcpu));
10677}
10678
10679static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
10680{
10681 if (!list_empty_careful(&vcpu->async_pf.done))
10682 return true;
10683
10684 if (kvm_apic_has_events(vcpu))
10685 return true;
10686
10687 if (vcpu->arch.pv.pv_unhalted)
10688 return true;
10689
10690 if (vcpu->arch.exception.pending)
10691 return true;
10692
10693 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
10694 (vcpu->arch.nmi_pending &&
10695 kvm_x86_ops.nmi_allowed(vcpu, false)))
10696 return true;
10697
10698 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
10699 (vcpu->arch.smi_pending &&
10700 kvm_x86_ops.smi_allowed(vcpu, false)))
10701 return true;
10702
10703 if (kvm_arch_interrupt_allowed(vcpu) &&
10704 (kvm_cpu_has_interrupt(vcpu) ||
10705 kvm_guest_apic_has_interrupt(vcpu)))
10706 return true;
10707
10708 if (kvm_hv_has_stimer_pending(vcpu))
10709 return true;
10710
10711 if (is_guest_mode(vcpu) &&
10712 kvm_x86_ops.nested_ops->hv_timer_pending &&
10713 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
10714 return true;
10715
10716 return false;
10717}
10718
10719int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
10720{
10721 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
10722}
10723
10724bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
10725{
10726 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
10727 return true;
10728
10729 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
10730 kvm_test_request(KVM_REQ_SMI, vcpu) ||
10731 kvm_test_request(KVM_REQ_EVENT, vcpu))
10732 return true;
10733
10734 if (vcpu->arch.apicv_active && kvm_x86_ops.dy_apicv_has_pending_interrupt(vcpu))
10735 return true;
10736
10737 return false;
10738}
10739
10740bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
10741{
10742 return vcpu->arch.preempted_in_kernel;
10743}
10744
10745int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
10746{
10747 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
10748}
10749
10750int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
10751{
10752 return kvm_x86_ops.interrupt_allowed(vcpu, false);
10753}
10754
10755unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
10756{
10757 if (is_64_bit_mode(vcpu))
10758 return kvm_rip_read(vcpu);
10759 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
10760 kvm_rip_read(vcpu));
10761}
10762EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
10763
10764bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
10765{
10766 return kvm_get_linear_rip(vcpu) == linear_rip;
10767}
10768EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
10769
10770unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
10771{
10772 unsigned long rflags;
10773
10774 rflags = kvm_x86_ops.get_rflags(vcpu);
10775 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
10776 rflags &= ~X86_EFLAGS_TF;
10777 return rflags;
10778}
10779EXPORT_SYMBOL_GPL(kvm_get_rflags);
10780
10781static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
10782{
10783 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
10784 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
10785 rflags |= X86_EFLAGS_TF;
10786 kvm_x86_ops.set_rflags(vcpu, rflags);
10787}
10788
10789void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
10790{
10791 __kvm_set_rflags(vcpu, rflags);
10792 kvm_make_request(KVM_REQ_EVENT, vcpu);
10793}
10794EXPORT_SYMBOL_GPL(kvm_set_rflags);
10795
10796void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
10797{
10798 int r;
10799
10800 if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
10801 work->wakeup_all)
10802 return;
10803
10804 r = kvm_mmu_reload(vcpu);
10805 if (unlikely(r))
10806 return;
10807
10808 if (!vcpu->arch.mmu->direct_map &&
10809 work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
10810 return;
10811
10812 kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
10813}
10814
10815static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
10816{
10817 BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
10818
10819 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
10820}
10821
10822static inline u32 kvm_async_pf_next_probe(u32 key)
10823{
10824 return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
10825}
10826
10827static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10828{
10829 u32 key = kvm_async_pf_hash_fn(gfn);
10830
10831 while (vcpu->arch.apf.gfns[key] != ~0)
10832 key = kvm_async_pf_next_probe(key);
10833
10834 vcpu->arch.apf.gfns[key] = gfn;
10835}
10836
10837static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
10838{
10839 int i;
10840 u32 key = kvm_async_pf_hash_fn(gfn);
10841
10842 for (i = 0; i < ASYNC_PF_PER_VCPU &&
10843 (vcpu->arch.apf.gfns[key] != gfn &&
10844 vcpu->arch.apf.gfns[key] != ~0); i++)
10845 key = kvm_async_pf_next_probe(key);
10846
10847 return key;
10848}
10849
10850bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10851{
10852 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
10853}
10854
10855static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10856{
10857 u32 i, j, k;
10858
10859 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
10860
10861 if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
10862 return;
10863
10864 while (true) {
10865 vcpu->arch.apf.gfns[i] = ~0;
10866 do {
10867 j = kvm_async_pf_next_probe(j);
10868 if (vcpu->arch.apf.gfns[j] == ~0)
10869 return;
10870 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
10871
10872
10873
10874
10875
10876 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
10877 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
10878 i = j;
10879 }
10880}
10881
10882static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
10883{
10884 u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT;
10885
10886 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason,
10887 sizeof(reason));
10888}
10889
10890static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
10891{
10892 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
10893
10894 return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
10895 &token, offset, sizeof(token));
10896}
10897
10898static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
10899{
10900 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
10901 u32 val;
10902
10903 if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
10904 &val, offset, sizeof(val)))
10905 return false;
10906
10907 return !val;
10908}
10909
10910static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
10911{
10912 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
10913 return false;
10914
10915 if (!kvm_pv_async_pf_enabled(vcpu) ||
10916 (vcpu->arch.apf.send_user_only && kvm_x86_ops.get_cpl(vcpu) == 0))
10917 return false;
10918
10919 return true;
10920}
10921
10922bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
10923{
10924 if (unlikely(!lapic_in_kernel(vcpu) ||
10925 kvm_event_needs_reinjection(vcpu) ||
10926 vcpu->arch.exception.pending))
10927 return false;
10928
10929 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
10930 return false;
10931
10932
10933
10934
10935
10936 return kvm_arch_interrupt_allowed(vcpu);
10937}
10938
10939bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
10940 struct kvm_async_pf *work)
10941{
10942 struct x86_exception fault;
10943
10944 trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
10945 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
10946
10947 if (kvm_can_deliver_async_pf(vcpu) &&
10948 !apf_put_user_notpresent(vcpu)) {
10949 fault.vector = PF_VECTOR;
10950 fault.error_code_valid = true;
10951 fault.error_code = 0;
10952 fault.nested_page_fault = false;
10953 fault.address = work->arch.token;
10954 fault.async_page_fault = true;
10955 kvm_inject_page_fault(vcpu, &fault);
10956 return true;
10957 } else {
10958
10959
10960
10961
10962
10963
10964
10965
10966 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
10967 return false;
10968 }
10969}
10970
10971void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
10972 struct kvm_async_pf *work)
10973{
10974 struct kvm_lapic_irq irq = {
10975 .delivery_mode = APIC_DM_FIXED,
10976 .vector = vcpu->arch.apf.vec
10977 };
10978
10979 if (work->wakeup_all)
10980 work->arch.token = ~0;
10981 else
10982 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
10983 trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
10984
10985 if ((work->wakeup_all || work->notpresent_injected) &&
10986 kvm_pv_async_pf_enabled(vcpu) &&
10987 !apf_put_user_ready(vcpu, work->arch.token)) {
10988 vcpu->arch.apf.pageready_pending = true;
10989 kvm_apic_set_irq(vcpu, &irq, NULL);
10990 }
10991
10992 vcpu->arch.apf.halted = false;
10993 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10994}
10995
10996void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
10997{
10998 kvm_make_request(KVM_REQ_APF_READY, vcpu);
10999 if (!vcpu->arch.apf.pageready_pending)
11000 kvm_vcpu_kick(vcpu);
11001}
11002
11003bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
11004{
11005 if (!kvm_pv_async_pf_enabled(vcpu))
11006 return true;
11007 else
11008 return apf_pageready_slot_free(vcpu);
11009}
11010
11011void kvm_arch_start_assignment(struct kvm *kvm)
11012{
11013 atomic_inc(&kvm->arch.assigned_device_count);
11014}
11015EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
11016
11017void kvm_arch_end_assignment(struct kvm *kvm)
11018{
11019 atomic_dec(&kvm->arch.assigned_device_count);
11020}
11021EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
11022
11023bool kvm_arch_has_assigned_device(struct kvm *kvm)
11024{
11025 return atomic_read(&kvm->arch.assigned_device_count);
11026}
11027EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
11028
11029void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
11030{
11031 atomic_inc(&kvm->arch.noncoherent_dma_count);
11032}
11033EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
11034
11035void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
11036{
11037 atomic_dec(&kvm->arch.noncoherent_dma_count);
11038}
11039EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
11040
11041bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
11042{
11043 return atomic_read(&kvm->arch.noncoherent_dma_count);
11044}
11045EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
11046
11047bool kvm_arch_has_irq_bypass(void)
11048{
11049 return true;
11050}
11051
11052int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
11053 struct irq_bypass_producer *prod)
11054{
11055 struct kvm_kernel_irqfd *irqfd =
11056 container_of(cons, struct kvm_kernel_irqfd, consumer);
11057 int ret;
11058
11059 irqfd->producer = prod;
11060 kvm_arch_start_assignment(irqfd->kvm);
11061 ret = kvm_x86_ops.update_pi_irte(irqfd->kvm,
11062 prod->irq, irqfd->gsi, 1);
11063
11064 if (ret)
11065 kvm_arch_end_assignment(irqfd->kvm);
11066
11067 return ret;
11068}
11069
11070void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
11071 struct irq_bypass_producer *prod)
11072{
11073 int ret;
11074 struct kvm_kernel_irqfd *irqfd =
11075 container_of(cons, struct kvm_kernel_irqfd, consumer);
11076
11077 WARN_ON(irqfd->producer != prod);
11078 irqfd->producer = NULL;
11079
11080
11081
11082
11083
11084
11085
11086 ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
11087 if (ret)
11088 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
11089 " fails: %d\n", irqfd->consumer.token, ret);
11090
11091 kvm_arch_end_assignment(irqfd->kvm);
11092}
11093
11094int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
11095 uint32_t guest_irq, bool set)
11096{
11097 return kvm_x86_ops.update_pi_irte(kvm, host_irq, guest_irq, set);
11098}
11099
11100bool kvm_vector_hashing_enabled(void)
11101{
11102 return vector_hashing;
11103}
11104
11105bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
11106{
11107 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
11108}
11109EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
11110
11111
11112int kvm_spec_ctrl_test_value(u64 value)
11113{
11114
11115
11116
11117
11118
11119 u64 saved_value;
11120 unsigned long flags;
11121 int ret = 0;
11122
11123 local_irq_save(flags);
11124
11125 if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
11126 ret = 1;
11127 else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
11128 ret = 1;
11129 else
11130 wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
11131
11132 local_irq_restore(flags);
11133
11134 return ret;
11135}
11136EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
11137
11138void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
11139{
11140 struct x86_exception fault;
11141 u32 access = error_code &
11142 (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
11143
11144 if (!(error_code & PFERR_PRESENT_MASK) ||
11145 vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) {
11146
11147
11148
11149
11150
11151 fault.vector = PF_VECTOR;
11152 fault.error_code_valid = true;
11153 fault.error_code = error_code;
11154 fault.nested_page_fault = false;
11155 fault.address = gva;
11156 }
11157 vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
11158}
11159EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
11160
11161
11162
11163
11164
11165
11166int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
11167 struct x86_exception *e)
11168{
11169 if (r == X86EMUL_PROPAGATE_FAULT) {
11170 kvm_inject_emulated_page_fault(vcpu, e);
11171 return 1;
11172 }
11173
11174
11175
11176
11177
11178
11179
11180
11181 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
11182 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
11183 vcpu->run->internal.ndata = 0;
11184
11185 return 0;
11186}
11187EXPORT_SYMBOL_GPL(kvm_handle_memory_failure);
11188
11189int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
11190{
11191 bool pcid_enabled;
11192 struct x86_exception e;
11193 unsigned i;
11194 unsigned long roots_to_free = 0;
11195 struct {
11196 u64 pcid;
11197 u64 gla;
11198 } operand;
11199 int r;
11200
11201 r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
11202 if (r != X86EMUL_CONTINUE)
11203 return kvm_handle_memory_failure(vcpu, r, &e);
11204
11205 if (operand.pcid >> 12 != 0) {
11206 kvm_inject_gp(vcpu, 0);
11207 return 1;
11208 }
11209
11210 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
11211
11212 switch (type) {
11213 case INVPCID_TYPE_INDIV_ADDR:
11214 if ((!pcid_enabled && (operand.pcid != 0)) ||
11215 is_noncanonical_address(operand.gla, vcpu)) {
11216 kvm_inject_gp(vcpu, 0);
11217 return 1;
11218 }
11219 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
11220 return kvm_skip_emulated_instruction(vcpu);
11221
11222 case INVPCID_TYPE_SINGLE_CTXT:
11223 if (!pcid_enabled && (operand.pcid != 0)) {
11224 kvm_inject_gp(vcpu, 0);
11225 return 1;
11226 }
11227
11228 if (kvm_get_active_pcid(vcpu) == operand.pcid) {
11229 kvm_mmu_sync_roots(vcpu);
11230 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
11231 }
11232
11233 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
11234 if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
11235 == operand.pcid)
11236 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
11237
11238 kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
11239
11240
11241
11242
11243
11244
11245 return kvm_skip_emulated_instruction(vcpu);
11246
11247 case INVPCID_TYPE_ALL_NON_GLOBAL:
11248
11249
11250
11251
11252
11253
11254
11255 fallthrough;
11256 case INVPCID_TYPE_ALL_INCL_GLOBAL:
11257 kvm_mmu_unload(vcpu);
11258 return kvm_skip_emulated_instruction(vcpu);
11259
11260 default:
11261 BUG();
11262 }
11263}
11264EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
11265
11266EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
11267EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
11268EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
11269EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
11270EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
11271EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
11272EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
11273EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
11274EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
11275EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
11276EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
11277EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
11278EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
11279EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
11280EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
11281EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
11282EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
11283EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
11284EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
11285EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
11286EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
11287EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
11288