1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kvm_host.h>
20#include "irq.h"
21#include "ioapic.h"
22#include "mmu.h"
23#include "i8254.h"
24#include "tss.h"
25#include "kvm_cache_regs.h"
26#include "kvm_emulate.h"
27#include "x86.h"
28#include "cpuid.h"
29#include "pmu.h"
30#include "hyperv.h"
31#include "lapic.h"
32
33#include <linux/clocksource.h>
34#include <linux/interrupt.h>
35#include <linux/kvm.h>
36#include <linux/fs.h>
37#include <linux/vmalloc.h>
38#include <linux/export.h>
39#include <linux/moduleparam.h>
40#include <linux/mman.h>
41#include <linux/highmem.h>
42#include <linux/iommu.h>
43#include <linux/intel-iommu.h>
44#include <linux/cpufreq.h>
45#include <linux/user-return-notifier.h>
46#include <linux/srcu.h>
47#include <linux/slab.h>
48#include <linux/perf_event.h>
49#include <linux/uaccess.h>
50#include <linux/hash.h>
51#include <linux/pci.h>
52#include <linux/timekeeper_internal.h>
53#include <linux/pvclock_gtod.h>
54#include <linux/kvm_irqfd.h>
55#include <linux/irqbypass.h>
56#include <linux/sched/stat.h>
57#include <linux/sched/isolation.h>
58#include <linux/mem_encrypt.h>
59#include <linux/entry-kvm.h>
60
61#include <trace/events/kvm.h>
62
63#include <asm/debugreg.h>
64#include <asm/msr.h>
65#include <asm/desc.h>
66#include <asm/mce.h>
67#include <linux/kernel_stat.h>
68#include <asm/fpu/internal.h>
69#include <asm/pvclock.h>
70#include <asm/div64.h>
71#include <asm/irq_remapping.h>
72#include <asm/mshyperv.h>
73#include <asm/hypervisor.h>
74#include <asm/intel_pt.h>
75#include <asm/emulate_prefix.h>
76#include <clocksource/hyperv_timer.h>
77
78#define CREATE_TRACE_POINTS
79#include "trace.h"
80
81#define MAX_IO_MSRS 256
82#define KVM_MAX_MCE_BANKS 32
83u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
84EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
85
86#define emul_to_vcpu(ctxt) \
87 ((struct kvm_vcpu *)(ctxt)->vcpu)
88
89
90
91
92
93#ifdef CONFIG_X86_64
94static
95u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
96#else
97static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
98#endif
99
100static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
101
102#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
103 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
104
105static void update_cr8_intercept(struct kvm_vcpu *vcpu);
106static void process_nmi(struct kvm_vcpu *vcpu);
107static void enter_smm(struct kvm_vcpu *vcpu);
108static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
109static void store_regs(struct kvm_vcpu *vcpu);
110static int sync_regs(struct kvm_vcpu *vcpu);
111
112struct kvm_x86_ops kvm_x86_ops __read_mostly;
113EXPORT_SYMBOL_GPL(kvm_x86_ops);
114
115static bool __read_mostly ignore_msrs = 0;
116module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
117
118static bool __read_mostly report_ignored_msrs = true;
119module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
120
121unsigned int min_timer_period_us = 200;
122module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
123
124static bool __read_mostly kvmclock_periodic_sync = true;
125module_param(kvmclock_periodic_sync, bool, S_IRUGO);
126
127bool __read_mostly kvm_has_tsc_control;
128EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
129u32 __read_mostly kvm_max_guest_tsc_khz;
130EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
131u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
132EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
133u64 __read_mostly kvm_max_tsc_scaling_ratio;
134EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
135u64 __read_mostly kvm_default_tsc_scaling_ratio;
136EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
137
138
139static u32 __read_mostly tsc_tolerance_ppm = 250;
140module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
141
142
143
144
145
146
147
148static int __read_mostly lapic_timer_advance_ns = -1;
149module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
150
151static bool __read_mostly vector_hashing = true;
152module_param(vector_hashing, bool, S_IRUGO);
153
154bool __read_mostly enable_vmware_backdoor = false;
155module_param(enable_vmware_backdoor, bool, S_IRUGO);
156EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
157
158static bool __read_mostly force_emulation_prefix = false;
159module_param(force_emulation_prefix, bool, S_IRUGO);
160
161int __read_mostly pi_inject_timer = -1;
162module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
163
164#define KVM_NR_SHARED_MSRS 16
165
166struct kvm_shared_msrs_global {
167 int nr;
168 u32 msrs[KVM_NR_SHARED_MSRS];
169};
170
171struct kvm_shared_msrs {
172 struct user_return_notifier urn;
173 bool registered;
174 struct kvm_shared_msr_values {
175 u64 host;
176 u64 curr;
177 } values[KVM_NR_SHARED_MSRS];
178};
179
180static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
181static struct kvm_shared_msrs __percpu *shared_msrs;
182
183#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
184 | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
185 | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
186 | XFEATURE_MASK_PKRU)
187
188u64 __read_mostly host_efer;
189EXPORT_SYMBOL_GPL(host_efer);
190
191bool __read_mostly allow_smaller_maxphyaddr = 0;
192EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
193
194static u64 __read_mostly host_xss;
195u64 __read_mostly supported_xss;
196EXPORT_SYMBOL_GPL(supported_xss);
197
198struct kvm_stats_debugfs_item debugfs_entries[] = {
199 VCPU_STAT("pf_fixed", pf_fixed),
200 VCPU_STAT("pf_guest", pf_guest),
201 VCPU_STAT("tlb_flush", tlb_flush),
202 VCPU_STAT("invlpg", invlpg),
203 VCPU_STAT("exits", exits),
204 VCPU_STAT("io_exits", io_exits),
205 VCPU_STAT("mmio_exits", mmio_exits),
206 VCPU_STAT("signal_exits", signal_exits),
207 VCPU_STAT("irq_window", irq_window_exits),
208 VCPU_STAT("nmi_window", nmi_window_exits),
209 VCPU_STAT("halt_exits", halt_exits),
210 VCPU_STAT("halt_successful_poll", halt_successful_poll),
211 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
212 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
213 VCPU_STAT("halt_wakeup", halt_wakeup),
214 VCPU_STAT("hypercalls", hypercalls),
215 VCPU_STAT("request_irq", request_irq_exits),
216 VCPU_STAT("irq_exits", irq_exits),
217 VCPU_STAT("host_state_reload", host_state_reload),
218 VCPU_STAT("fpu_reload", fpu_reload),
219 VCPU_STAT("insn_emulation", insn_emulation),
220 VCPU_STAT("insn_emulation_fail", insn_emulation_fail),
221 VCPU_STAT("irq_injections", irq_injections),
222 VCPU_STAT("nmi_injections", nmi_injections),
223 VCPU_STAT("req_event", req_event),
224 VCPU_STAT("l1d_flush", l1d_flush),
225 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
226 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
227 VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
228 VM_STAT("mmu_pte_write", mmu_pte_write),
229 VM_STAT("mmu_pte_updated", mmu_pte_updated),
230 VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
231 VM_STAT("mmu_flooded", mmu_flooded),
232 VM_STAT("mmu_recycled", mmu_recycled),
233 VM_STAT("mmu_cache_miss", mmu_cache_miss),
234 VM_STAT("mmu_unsync", mmu_unsync),
235 VM_STAT("remote_tlb_flush", remote_tlb_flush),
236 VM_STAT("largepages", lpages, .mode = 0444),
237 VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
238 VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
239 { NULL }
240};
241
242u64 __read_mostly host_xcr0;
243u64 __read_mostly supported_xcr0;
244EXPORT_SYMBOL_GPL(supported_xcr0);
245
246static struct kmem_cache *x86_fpu_cache;
247
248static struct kmem_cache *x86_emulator_cache;
249
250
251
252
253
254
255static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
256 u64 data, bool write)
257{
258 const char *op = write ? "wrmsr" : "rdmsr";
259
260 if (ignore_msrs) {
261 if (report_ignored_msrs)
262 vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n",
263 op, msr, data);
264
265 return 0;
266 } else {
267 vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n",
268 op, msr, data);
269 return 1;
270 }
271}
272
273static struct kmem_cache *kvm_alloc_emulator_cache(void)
274{
275 unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src);
276 unsigned int size = sizeof(struct x86_emulate_ctxt);
277
278 return kmem_cache_create_usercopy("x86_emulator", size,
279 __alignof__(struct x86_emulate_ctxt),
280 SLAB_ACCOUNT, useroffset,
281 size - useroffset, NULL);
282}
283
284static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
285
286static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
287{
288 int i;
289 for (i = 0; i < ASYNC_PF_PER_VCPU; i++)
290 vcpu->arch.apf.gfns[i] = ~0;
291}
292
293static void kvm_on_user_return(struct user_return_notifier *urn)
294{
295 unsigned slot;
296 struct kvm_shared_msrs *locals
297 = container_of(urn, struct kvm_shared_msrs, urn);
298 struct kvm_shared_msr_values *values;
299 unsigned long flags;
300
301
302
303
304
305 local_irq_save(flags);
306 if (locals->registered) {
307 locals->registered = false;
308 user_return_notifier_unregister(urn);
309 }
310 local_irq_restore(flags);
311 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
312 values = &locals->values[slot];
313 if (values->host != values->curr) {
314 wrmsrl(shared_msrs_global.msrs[slot], values->host);
315 values->curr = values->host;
316 }
317 }
318}
319
320void kvm_define_shared_msr(unsigned slot, u32 msr)
321{
322 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
323 shared_msrs_global.msrs[slot] = msr;
324 if (slot >= shared_msrs_global.nr)
325 shared_msrs_global.nr = slot + 1;
326}
327EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
328
329static void kvm_shared_msr_cpu_online(void)
330{
331 unsigned int cpu = smp_processor_id();
332 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
333 u64 value;
334 int i;
335
336 for (i = 0; i < shared_msrs_global.nr; ++i) {
337 rdmsrl_safe(shared_msrs_global.msrs[i], &value);
338 smsr->values[i].host = value;
339 smsr->values[i].curr = value;
340 }
341}
342
343int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
344{
345 unsigned int cpu = smp_processor_id();
346 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
347 int err;
348
349 value = (value & mask) | (smsr->values[slot].host & ~mask);
350 if (value == smsr->values[slot].curr)
351 return 0;
352 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
353 if (err)
354 return 1;
355
356 smsr->values[slot].curr = value;
357 if (!smsr->registered) {
358 smsr->urn.on_user_return = kvm_on_user_return;
359 user_return_notifier_register(&smsr->urn);
360 smsr->registered = true;
361 }
362 return 0;
363}
364EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
365
366static void drop_user_return_notifiers(void)
367{
368 unsigned int cpu = smp_processor_id();
369 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
370
371 if (smsr->registered)
372 kvm_on_user_return(&smsr->urn);
373}
374
375u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
376{
377 return vcpu->arch.apic_base;
378}
379EXPORT_SYMBOL_GPL(kvm_get_apic_base);
380
381enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
382{
383 return kvm_apic_mode(kvm_get_apic_base(vcpu));
384}
385EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
386
387int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
388{
389 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
390 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
391 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
392 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
393
394 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
395 return 1;
396 if (!msr_info->host_initiated) {
397 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
398 return 1;
399 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
400 return 1;
401 }
402
403 kvm_lapic_set_base(vcpu, msr_info->data);
404 kvm_recalculate_apic_map(vcpu->kvm);
405 return 0;
406}
407EXPORT_SYMBOL_GPL(kvm_set_apic_base);
408
409asmlinkage __visible noinstr void kvm_spurious_fault(void)
410{
411
412 BUG_ON(!kvm_rebooting);
413}
414EXPORT_SYMBOL_GPL(kvm_spurious_fault);
415
416#define EXCPT_BENIGN 0
417#define EXCPT_CONTRIBUTORY 1
418#define EXCPT_PF 2
419
420static int exception_class(int vector)
421{
422 switch (vector) {
423 case PF_VECTOR:
424 return EXCPT_PF;
425 case DE_VECTOR:
426 case TS_VECTOR:
427 case NP_VECTOR:
428 case SS_VECTOR:
429 case GP_VECTOR:
430 return EXCPT_CONTRIBUTORY;
431 default:
432 break;
433 }
434 return EXCPT_BENIGN;
435}
436
437#define EXCPT_FAULT 0
438#define EXCPT_TRAP 1
439#define EXCPT_ABORT 2
440#define EXCPT_INTERRUPT 3
441
442static int exception_type(int vector)
443{
444 unsigned int mask;
445
446 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
447 return EXCPT_INTERRUPT;
448
449 mask = 1 << vector;
450
451
452 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
453 return EXCPT_TRAP;
454
455 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
456 return EXCPT_ABORT;
457
458
459 return EXCPT_FAULT;
460}
461
462void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
463{
464 unsigned nr = vcpu->arch.exception.nr;
465 bool has_payload = vcpu->arch.exception.has_payload;
466 unsigned long payload = vcpu->arch.exception.payload;
467
468 if (!has_payload)
469 return;
470
471 switch (nr) {
472 case DB_VECTOR:
473
474
475
476
477
478 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
479
480
481
482 vcpu->arch.dr6 |= DR6_RTM;
483 vcpu->arch.dr6 |= payload;
484
485
486
487
488
489
490
491
492 vcpu->arch.dr6 ^= payload & DR6_RTM;
493
494
495
496
497
498
499
500 vcpu->arch.dr6 &= ~BIT(12);
501 break;
502 case PF_VECTOR:
503 vcpu->arch.cr2 = payload;
504 break;
505 }
506
507 vcpu->arch.exception.has_payload = false;
508 vcpu->arch.exception.payload = 0;
509}
510EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
511
512static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
513 unsigned nr, bool has_error, u32 error_code,
514 bool has_payload, unsigned long payload, bool reinject)
515{
516 u32 prev_nr;
517 int class1, class2;
518
519 kvm_make_request(KVM_REQ_EVENT, vcpu);
520
521 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
522 queue:
523 if (has_error && !is_protmode(vcpu))
524 has_error = false;
525 if (reinject) {
526
527
528
529
530
531
532
533
534 WARN_ON_ONCE(vcpu->arch.exception.pending);
535 vcpu->arch.exception.injected = true;
536 if (WARN_ON_ONCE(has_payload)) {
537
538
539
540
541 has_payload = false;
542 payload = 0;
543 }
544 } else {
545 vcpu->arch.exception.pending = true;
546 vcpu->arch.exception.injected = false;
547 }
548 vcpu->arch.exception.has_error_code = has_error;
549 vcpu->arch.exception.nr = nr;
550 vcpu->arch.exception.error_code = error_code;
551 vcpu->arch.exception.has_payload = has_payload;
552 vcpu->arch.exception.payload = payload;
553 if (!is_guest_mode(vcpu))
554 kvm_deliver_exception_payload(vcpu);
555 return;
556 }
557
558
559 prev_nr = vcpu->arch.exception.nr;
560 if (prev_nr == DF_VECTOR) {
561
562 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
563 return;
564 }
565 class1 = exception_class(prev_nr);
566 class2 = exception_class(nr);
567 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
568 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
569
570
571
572
573
574 vcpu->arch.exception.pending = true;
575 vcpu->arch.exception.injected = false;
576 vcpu->arch.exception.has_error_code = true;
577 vcpu->arch.exception.nr = DF_VECTOR;
578 vcpu->arch.exception.error_code = 0;
579 vcpu->arch.exception.has_payload = false;
580 vcpu->arch.exception.payload = 0;
581 } else
582
583
584
585 goto queue;
586}
587
588void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
589{
590 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
591}
592EXPORT_SYMBOL_GPL(kvm_queue_exception);
593
594void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
595{
596 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
597}
598EXPORT_SYMBOL_GPL(kvm_requeue_exception);
599
600void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
601 unsigned long payload)
602{
603 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
604}
605EXPORT_SYMBOL_GPL(kvm_queue_exception_p);
606
607static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
608 u32 error_code, unsigned long payload)
609{
610 kvm_multiple_exception(vcpu, nr, true, error_code,
611 true, payload, false);
612}
613
614int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
615{
616 if (err)
617 kvm_inject_gp(vcpu, 0);
618 else
619 return kvm_skip_emulated_instruction(vcpu);
620
621 return 1;
622}
623EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
624
625void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
626{
627 ++vcpu->stat.pf_guest;
628 vcpu->arch.exception.nested_apf =
629 is_guest_mode(vcpu) && fault->async_page_fault;
630 if (vcpu->arch.exception.nested_apf) {
631 vcpu->arch.apf.nested_apf_token = fault->address;
632 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
633 } else {
634 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
635 fault->address);
636 }
637}
638EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
639
640bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
641 struct x86_exception *fault)
642{
643 struct kvm_mmu *fault_mmu;
644 WARN_ON_ONCE(fault->vector != PF_VECTOR);
645
646 fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu :
647 vcpu->arch.walk_mmu;
648
649
650
651
652
653 if ((fault->error_code & PFERR_PRESENT_MASK) &&
654 !(fault->error_code & PFERR_RSVD_MASK))
655 kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
656 fault_mmu->root_hpa);
657
658 fault_mmu->inject_page_fault(vcpu, fault);
659 return fault->nested_page_fault;
660}
661EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
662
663void kvm_inject_nmi(struct kvm_vcpu *vcpu)
664{
665 atomic_inc(&vcpu->arch.nmi_queued);
666 kvm_make_request(KVM_REQ_NMI, vcpu);
667}
668EXPORT_SYMBOL_GPL(kvm_inject_nmi);
669
670void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
671{
672 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
673}
674EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
675
676void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
677{
678 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
679}
680EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
681
682
683
684
685
686bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
687{
688 if (kvm_x86_ops.get_cpl(vcpu) <= required_cpl)
689 return true;
690 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
691 return false;
692}
693EXPORT_SYMBOL_GPL(kvm_require_cpl);
694
695bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
696{
697 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
698 return true;
699
700 kvm_queue_exception(vcpu, UD_VECTOR);
701 return false;
702}
703EXPORT_SYMBOL_GPL(kvm_require_dr);
704
705
706
707
708
709
710int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
711 gfn_t ngfn, void *data, int offset, int len,
712 u32 access)
713{
714 struct x86_exception exception;
715 gfn_t real_gfn;
716 gpa_t ngpa;
717
718 ngpa = gfn_to_gpa(ngfn);
719 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
720 if (real_gfn == UNMAPPED_GVA)
721 return -EFAULT;
722
723 real_gfn = gpa_to_gfn(real_gfn);
724
725 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
726}
727EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
728
729static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
730 void *data, int offset, int len, u32 access)
731{
732 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
733 data, offset, len, access);
734}
735
736static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
737{
738 return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
739 rsvd_bits(1, 2);
740}
741
742
743
744
745int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
746{
747 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
748 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
749 int i;
750 int ret;
751 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
752
753 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
754 offset * sizeof(u64), sizeof(pdpte),
755 PFERR_USER_MASK|PFERR_WRITE_MASK);
756 if (ret < 0) {
757 ret = 0;
758 goto out;
759 }
760 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
761 if ((pdpte[i] & PT_PRESENT_MASK) &&
762 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
763 ret = 0;
764 goto out;
765 }
766 }
767 ret = 1;
768
769 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
770 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
771
772out:
773
774 return ret;
775}
776EXPORT_SYMBOL_GPL(load_pdptrs);
777
778bool pdptrs_changed(struct kvm_vcpu *vcpu)
779{
780 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
781 int offset;
782 gfn_t gfn;
783 int r;
784
785 if (!is_pae_paging(vcpu))
786 return false;
787
788 if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
789 return true;
790
791 gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
792 offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
793 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
794 PFERR_USER_MASK | PFERR_WRITE_MASK);
795 if (r < 0)
796 return true;
797
798 return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
799}
800EXPORT_SYMBOL_GPL(pdptrs_changed);
801
802int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
803{
804 unsigned long old_cr0 = kvm_read_cr0(vcpu);
805 unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
806 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
807
808 cr0 |= X86_CR0_ET;
809
810#ifdef CONFIG_X86_64
811 if (cr0 & 0xffffffff00000000UL)
812 return 1;
813#endif
814
815 cr0 &= ~CR0_RESERVED_BITS;
816
817 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
818 return 1;
819
820 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
821 return 1;
822
823#ifdef CONFIG_X86_64
824 if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
825 (cr0 & X86_CR0_PG)) {
826 int cs_db, cs_l;
827
828 if (!is_pae(vcpu))
829 return 1;
830 kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
831 if (cs_l)
832 return 1;
833 }
834#endif
835 if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
836 is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
837 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
838 return 1;
839
840 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
841 return 1;
842
843 kvm_x86_ops.set_cr0(vcpu, cr0);
844
845 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
846 kvm_clear_async_pf_completion_queue(vcpu);
847 kvm_async_pf_hash_reset(vcpu);
848 }
849
850 if ((cr0 ^ old_cr0) & update_bits)
851 kvm_mmu_reset_context(vcpu);
852
853 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
854 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
855 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
856 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
857
858 return 0;
859}
860EXPORT_SYMBOL_GPL(kvm_set_cr0);
861
862void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
863{
864 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
865}
866EXPORT_SYMBOL_GPL(kvm_lmsw);
867
868void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
869{
870 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
871
872 if (vcpu->arch.xcr0 != host_xcr0)
873 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
874
875 if (vcpu->arch.xsaves_enabled &&
876 vcpu->arch.ia32_xss != host_xss)
877 wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
878 }
879
880 if (static_cpu_has(X86_FEATURE_PKU) &&
881 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
882 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
883 vcpu->arch.pkru != vcpu->arch.host_pkru)
884 __write_pkru(vcpu->arch.pkru);
885}
886EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
887
888void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
889{
890 if (static_cpu_has(X86_FEATURE_PKU) &&
891 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
892 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
893 vcpu->arch.pkru = rdpkru();
894 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
895 __write_pkru(vcpu->arch.host_pkru);
896 }
897
898 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
899
900 if (vcpu->arch.xcr0 != host_xcr0)
901 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
902
903 if (vcpu->arch.xsaves_enabled &&
904 vcpu->arch.ia32_xss != host_xss)
905 wrmsrl(MSR_IA32_XSS, host_xss);
906 }
907
908}
909EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
910
911static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
912{
913 u64 xcr0 = xcr;
914 u64 old_xcr0 = vcpu->arch.xcr0;
915 u64 valid_bits;
916
917
918 if (index != XCR_XFEATURE_ENABLED_MASK)
919 return 1;
920 if (!(xcr0 & XFEATURE_MASK_FP))
921 return 1;
922 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
923 return 1;
924
925
926
927
928
929
930 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
931 if (xcr0 & ~valid_bits)
932 return 1;
933
934 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
935 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
936 return 1;
937
938 if (xcr0 & XFEATURE_MASK_AVX512) {
939 if (!(xcr0 & XFEATURE_MASK_YMM))
940 return 1;
941 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
942 return 1;
943 }
944 vcpu->arch.xcr0 = xcr0;
945
946 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
947 kvm_update_cpuid_runtime(vcpu);
948 return 0;
949}
950
951int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
952{
953 if (kvm_x86_ops.get_cpl(vcpu) != 0 ||
954 __kvm_set_xcr(vcpu, index, xcr)) {
955 kvm_inject_gp(vcpu, 0);
956 return 1;
957 }
958 return 0;
959}
960EXPORT_SYMBOL_GPL(kvm_set_xcr);
961
962int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
963{
964 if (cr4 & cr4_reserved_bits)
965 return -EINVAL;
966
967 if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
968 return -EINVAL;
969
970 return 0;
971}
972EXPORT_SYMBOL_GPL(kvm_valid_cr4);
973
974int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
975{
976 unsigned long old_cr4 = kvm_read_cr4(vcpu);
977 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
978 X86_CR4_SMEP;
979 unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
980
981 if (kvm_valid_cr4(vcpu, cr4))
982 return 1;
983
984 if (is_long_mode(vcpu)) {
985 if (!(cr4 & X86_CR4_PAE))
986 return 1;
987 if ((cr4 ^ old_cr4) & X86_CR4_LA57)
988 return 1;
989 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
990 && ((cr4 ^ old_cr4) & pdptr_bits)
991 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
992 kvm_read_cr3(vcpu)))
993 return 1;
994
995 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
996 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
997 return 1;
998
999
1000 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
1001 return 1;
1002 }
1003
1004 if (kvm_x86_ops.set_cr4(vcpu, cr4))
1005 return 1;
1006
1007 if (((cr4 ^ old_cr4) & mmu_role_bits) ||
1008 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
1009 kvm_mmu_reset_context(vcpu);
1010
1011 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
1012 kvm_update_cpuid_runtime(vcpu);
1013
1014 return 0;
1015}
1016EXPORT_SYMBOL_GPL(kvm_set_cr4);
1017
1018int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1019{
1020 bool skip_tlb_flush = false;
1021#ifdef CONFIG_X86_64
1022 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
1023
1024 if (pcid_enabled) {
1025 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
1026 cr3 &= ~X86_CR3_PCID_NOFLUSH;
1027 }
1028#endif
1029
1030 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
1031 if (!skip_tlb_flush) {
1032 kvm_mmu_sync_roots(vcpu);
1033 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
1034 }
1035 return 0;
1036 }
1037
1038 if (is_long_mode(vcpu) &&
1039 (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
1040 return 1;
1041 else if (is_pae_paging(vcpu) &&
1042 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
1043 return 1;
1044
1045 kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
1046 vcpu->arch.cr3 = cr3;
1047 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
1048
1049 return 0;
1050}
1051EXPORT_SYMBOL_GPL(kvm_set_cr3);
1052
1053int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
1054{
1055 if (cr8 & CR8_RESERVED_BITS)
1056 return 1;
1057 if (lapic_in_kernel(vcpu))
1058 kvm_lapic_set_tpr(vcpu, cr8);
1059 else
1060 vcpu->arch.cr8 = cr8;
1061 return 0;
1062}
1063EXPORT_SYMBOL_GPL(kvm_set_cr8);
1064
1065unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1066{
1067 if (lapic_in_kernel(vcpu))
1068 return kvm_lapic_get_cr8(vcpu);
1069 else
1070 return vcpu->arch.cr8;
1071}
1072EXPORT_SYMBOL_GPL(kvm_get_cr8);
1073
1074static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1075{
1076 int i;
1077
1078 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1079 for (i = 0; i < KVM_NR_DB_REGS; i++)
1080 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1081 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
1082 }
1083}
1084
1085void kvm_update_dr7(struct kvm_vcpu *vcpu)
1086{
1087 unsigned long dr7;
1088
1089 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1090 dr7 = vcpu->arch.guest_debug_dr7;
1091 else
1092 dr7 = vcpu->arch.dr7;
1093 kvm_x86_ops.set_dr7(vcpu, dr7);
1094 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1095 if (dr7 & DR7_BP_EN_MASK)
1096 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1097}
1098EXPORT_SYMBOL_GPL(kvm_update_dr7);
1099
1100static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1101{
1102 u64 fixed = DR6_FIXED_1;
1103
1104 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1105 fixed |= DR6_RTM;
1106 return fixed;
1107}
1108
1109static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1110{
1111 size_t size = ARRAY_SIZE(vcpu->arch.db);
1112
1113 switch (dr) {
1114 case 0 ... 3:
1115 vcpu->arch.db[array_index_nospec(dr, size)] = val;
1116 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1117 vcpu->arch.eff_db[dr] = val;
1118 break;
1119 case 4:
1120 case 6:
1121 if (!kvm_dr6_valid(val))
1122 return -1;
1123 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1124 break;
1125 case 5:
1126 default:
1127 if (!kvm_dr7_valid(val))
1128 return -1;
1129 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1130 kvm_update_dr7(vcpu);
1131 break;
1132 }
1133
1134 return 0;
1135}
1136
1137int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1138{
1139 if (__kvm_set_dr(vcpu, dr, val)) {
1140 kvm_inject_gp(vcpu, 0);
1141 return 1;
1142 }
1143 return 0;
1144}
1145EXPORT_SYMBOL_GPL(kvm_set_dr);
1146
1147int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1148{
1149 size_t size = ARRAY_SIZE(vcpu->arch.db);
1150
1151 switch (dr) {
1152 case 0 ... 3:
1153 *val = vcpu->arch.db[array_index_nospec(dr, size)];
1154 break;
1155 case 4:
1156 case 6:
1157 *val = vcpu->arch.dr6;
1158 break;
1159 case 5:
1160 default:
1161 *val = vcpu->arch.dr7;
1162 break;
1163 }
1164 return 0;
1165}
1166EXPORT_SYMBOL_GPL(kvm_get_dr);
1167
1168bool kvm_rdpmc(struct kvm_vcpu *vcpu)
1169{
1170 u32 ecx = kvm_rcx_read(vcpu);
1171 u64 data;
1172 int err;
1173
1174 err = kvm_pmu_rdpmc(vcpu, ecx, &data);
1175 if (err)
1176 return err;
1177 kvm_rax_write(vcpu, (u32)data);
1178 kvm_rdx_write(vcpu, data >> 32);
1179 return err;
1180}
1181EXPORT_SYMBOL_GPL(kvm_rdpmc);
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195static const u32 msrs_to_save_all[] = {
1196 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1197 MSR_STAR,
1198#ifdef CONFIG_X86_64
1199 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1200#endif
1201 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1202 MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1203 MSR_IA32_SPEC_CTRL,
1204 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1205 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1206 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1207 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1208 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1209 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1210 MSR_IA32_UMWAIT_CONTROL,
1211
1212 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1213 MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
1214 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1215 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1216 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1217 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1218 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1219 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1220 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1221 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1222 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1223 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1224 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1225 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1226 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1227 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1228 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1229 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1230 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1231 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1232 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1233 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1234};
1235
1236static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1237static unsigned num_msrs_to_save;
1238
1239static const u32 emulated_msrs_all[] = {
1240 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1241 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1242 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1243 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1244 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1245 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1246 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1247 HV_X64_MSR_RESET,
1248 HV_X64_MSR_VP_INDEX,
1249 HV_X64_MSR_VP_RUNTIME,
1250 HV_X64_MSR_SCONTROL,
1251 HV_X64_MSR_STIMER0_CONFIG,
1252 HV_X64_MSR_VP_ASSIST_PAGE,
1253 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1254 HV_X64_MSR_TSC_EMULATION_STATUS,
1255 HV_X64_MSR_SYNDBG_OPTIONS,
1256 HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS,
1257 HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER,
1258 HV_X64_MSR_SYNDBG_PENDING_BUFFER,
1259
1260 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1261 MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
1262
1263 MSR_IA32_TSC_ADJUST,
1264 MSR_IA32_TSCDEADLINE,
1265 MSR_IA32_ARCH_CAPABILITIES,
1266 MSR_IA32_PERF_CAPABILITIES,
1267 MSR_IA32_MISC_ENABLE,
1268 MSR_IA32_MCG_STATUS,
1269 MSR_IA32_MCG_CTL,
1270 MSR_IA32_MCG_EXT_CTL,
1271 MSR_IA32_SMBASE,
1272 MSR_SMI_COUNT,
1273 MSR_PLATFORM_INFO,
1274 MSR_MISC_FEATURES_ENABLES,
1275 MSR_AMD64_VIRT_SPEC_CTRL,
1276 MSR_IA32_POWER_CTL,
1277 MSR_IA32_UCODE_REV,
1278
1279
1280
1281
1282
1283
1284
1285
1286 MSR_IA32_VMX_BASIC,
1287 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1288 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1289 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1290 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1291 MSR_IA32_VMX_MISC,
1292 MSR_IA32_VMX_CR0_FIXED0,
1293 MSR_IA32_VMX_CR4_FIXED0,
1294 MSR_IA32_VMX_VMCS_ENUM,
1295 MSR_IA32_VMX_PROCBASED_CTLS2,
1296 MSR_IA32_VMX_EPT_VPID_CAP,
1297 MSR_IA32_VMX_VMFUNC,
1298
1299 MSR_K7_HWCR,
1300 MSR_KVM_POLL_CONTROL,
1301};
1302
1303static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1304static unsigned num_emulated_msrs;
1305
1306
1307
1308
1309
1310static const u32 msr_based_features_all[] = {
1311 MSR_IA32_VMX_BASIC,
1312 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1313 MSR_IA32_VMX_PINBASED_CTLS,
1314 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1315 MSR_IA32_VMX_PROCBASED_CTLS,
1316 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1317 MSR_IA32_VMX_EXIT_CTLS,
1318 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1319 MSR_IA32_VMX_ENTRY_CTLS,
1320 MSR_IA32_VMX_MISC,
1321 MSR_IA32_VMX_CR0_FIXED0,
1322 MSR_IA32_VMX_CR0_FIXED1,
1323 MSR_IA32_VMX_CR4_FIXED0,
1324 MSR_IA32_VMX_CR4_FIXED1,
1325 MSR_IA32_VMX_VMCS_ENUM,
1326 MSR_IA32_VMX_PROCBASED_CTLS2,
1327 MSR_IA32_VMX_EPT_VPID_CAP,
1328 MSR_IA32_VMX_VMFUNC,
1329
1330 MSR_F10H_DECFG,
1331 MSR_IA32_UCODE_REV,
1332 MSR_IA32_ARCH_CAPABILITIES,
1333 MSR_IA32_PERF_CAPABILITIES,
1334};
1335
1336static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1337static unsigned int num_msr_based_features;
1338
1339static u64 kvm_get_arch_capabilities(void)
1340{
1341 u64 data = 0;
1342
1343 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1344 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1345
1346
1347
1348
1349
1350
1351
1352 data |= ARCH_CAP_PSCHANGE_MC_NO;
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1364 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1365
1366 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1367 data |= ARCH_CAP_RDCL_NO;
1368 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1369 data |= ARCH_CAP_SSB_NO;
1370 if (!boot_cpu_has_bug(X86_BUG_MDS))
1371 data |= ARCH_CAP_MDS_NO;
1372
1373
1374
1375
1376
1377
1378
1379 if (!boot_cpu_has(X86_FEATURE_RTM))
1380 data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
1381 else if (!boot_cpu_has_bug(X86_BUG_TAA))
1382 data |= ARCH_CAP_TAA_NO;
1383
1384 return data;
1385}
1386
1387static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1388{
1389 switch (msr->index) {
1390 case MSR_IA32_ARCH_CAPABILITIES:
1391 msr->data = kvm_get_arch_capabilities();
1392 break;
1393 case MSR_IA32_UCODE_REV:
1394 rdmsrl_safe(msr->index, &msr->data);
1395 break;
1396 default:
1397 return kvm_x86_ops.get_msr_feature(msr);
1398 }
1399 return 0;
1400}
1401
1402static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1403{
1404 struct kvm_msr_entry msr;
1405 int r;
1406
1407 msr.index = index;
1408 r = kvm_get_msr_feature(&msr);
1409
1410 if (r == KVM_MSR_RET_INVALID) {
1411
1412 *data = 0;
1413 r = kvm_msr_ignored_check(vcpu, index, 0, false);
1414 }
1415
1416 if (r)
1417 return r;
1418
1419 *data = msr.data;
1420
1421 return 0;
1422}
1423
1424static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1425{
1426 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1427 return false;
1428
1429 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1430 return false;
1431
1432 if (efer & (EFER_LME | EFER_LMA) &&
1433 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1434 return false;
1435
1436 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1437 return false;
1438
1439 return true;
1440
1441}
1442bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1443{
1444 if (efer & efer_reserved_bits)
1445 return false;
1446
1447 return __kvm_valid_efer(vcpu, efer);
1448}
1449EXPORT_SYMBOL_GPL(kvm_valid_efer);
1450
1451static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1452{
1453 u64 old_efer = vcpu->arch.efer;
1454 u64 efer = msr_info->data;
1455
1456 if (efer & efer_reserved_bits)
1457 return 1;
1458
1459 if (!msr_info->host_initiated) {
1460 if (!__kvm_valid_efer(vcpu, efer))
1461 return 1;
1462
1463 if (is_paging(vcpu) &&
1464 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1465 return 1;
1466 }
1467
1468 efer &= ~EFER_LMA;
1469 efer |= vcpu->arch.efer & EFER_LMA;
1470
1471 kvm_x86_ops.set_efer(vcpu, efer);
1472
1473
1474 if ((efer ^ old_efer) & EFER_NX)
1475 kvm_mmu_reset_context(vcpu);
1476
1477 return 0;
1478}
1479
1480void kvm_enable_efer_bits(u64 mask)
1481{
1482 efer_reserved_bits &= ~mask;
1483}
1484EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1485
1486
1487
1488
1489
1490
1491
1492static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1493 bool host_initiated)
1494{
1495 struct msr_data msr;
1496
1497 switch (index) {
1498 case MSR_FS_BASE:
1499 case MSR_GS_BASE:
1500 case MSR_KERNEL_GS_BASE:
1501 case MSR_CSTAR:
1502 case MSR_LSTAR:
1503 if (is_noncanonical_address(data, vcpu))
1504 return 1;
1505 break;
1506 case MSR_IA32_SYSENTER_EIP:
1507 case MSR_IA32_SYSENTER_ESP:
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
1521 }
1522
1523 msr.data = data;
1524 msr.index = index;
1525 msr.host_initiated = host_initiated;
1526
1527 return kvm_x86_ops.set_msr(vcpu, &msr);
1528}
1529
1530static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
1531 u32 index, u64 data, bool host_initiated)
1532{
1533 int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
1534
1535 if (ret == KVM_MSR_RET_INVALID)
1536 ret = kvm_msr_ignored_check(vcpu, index, data, true);
1537
1538 return ret;
1539}
1540
1541
1542
1543
1544
1545
1546
1547int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1548 bool host_initiated)
1549{
1550 struct msr_data msr;
1551 int ret;
1552
1553 msr.index = index;
1554 msr.host_initiated = host_initiated;
1555
1556 ret = kvm_x86_ops.get_msr(vcpu, &msr);
1557 if (!ret)
1558 *data = msr.data;
1559 return ret;
1560}
1561
1562static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
1563 u32 index, u64 *data, bool host_initiated)
1564{
1565 int ret = __kvm_get_msr(vcpu, index, data, host_initiated);
1566
1567 if (ret == KVM_MSR_RET_INVALID) {
1568
1569 *data = 0;
1570 ret = kvm_msr_ignored_check(vcpu, index, 0, false);
1571 }
1572
1573 return ret;
1574}
1575
1576int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1577{
1578 return kvm_get_msr_ignored_check(vcpu, index, data, false);
1579}
1580EXPORT_SYMBOL_GPL(kvm_get_msr);
1581
1582int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1583{
1584 return kvm_set_msr_ignored_check(vcpu, index, data, false);
1585}
1586EXPORT_SYMBOL_GPL(kvm_set_msr);
1587
1588int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1589{
1590 u32 ecx = kvm_rcx_read(vcpu);
1591 u64 data;
1592
1593 if (kvm_get_msr(vcpu, ecx, &data)) {
1594 trace_kvm_msr_read_ex(ecx);
1595 kvm_inject_gp(vcpu, 0);
1596 return 1;
1597 }
1598
1599 trace_kvm_msr_read(ecx, data);
1600
1601 kvm_rax_write(vcpu, data & -1u);
1602 kvm_rdx_write(vcpu, (data >> 32) & -1u);
1603 return kvm_skip_emulated_instruction(vcpu);
1604}
1605EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
1606
1607int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
1608{
1609 u32 ecx = kvm_rcx_read(vcpu);
1610 u64 data = kvm_read_edx_eax(vcpu);
1611
1612 if (kvm_set_msr(vcpu, ecx, data)) {
1613 trace_kvm_msr_write_ex(ecx, data);
1614 kvm_inject_gp(vcpu, 0);
1615 return 1;
1616 }
1617
1618 trace_kvm_msr_write(ecx, data);
1619 return kvm_skip_emulated_instruction(vcpu);
1620}
1621EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
1622
1623bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
1624{
1625 return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
1626 xfer_to_guest_mode_work_pending();
1627}
1628EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request);
1629
1630
1631
1632
1633
1634
1635
1636
1637static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data)
1638{
1639 if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(vcpu->arch.apic))
1640 return 1;
1641
1642 if (((data & APIC_SHORT_MASK) == APIC_DEST_NOSHORT) &&
1643 ((data & APIC_DEST_MASK) == APIC_DEST_PHYSICAL) &&
1644 ((data & APIC_MODE_MASK) == APIC_DM_FIXED) &&
1645 ((u32)(data >> 32) != X2APIC_BROADCAST)) {
1646
1647 data &= ~(1 << 12);
1648 kvm_apic_send_ipi(vcpu->arch.apic, (u32)data, (u32)(data >> 32));
1649 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR2, (u32)(data >> 32));
1650 kvm_lapic_set_reg(vcpu->arch.apic, APIC_ICR, (u32)data);
1651 trace_kvm_apic_write(APIC_ICR, (u32)data);
1652 return 0;
1653 }
1654
1655 return 1;
1656}
1657
1658static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data)
1659{
1660 if (!kvm_can_use_hv_timer(vcpu))
1661 return 1;
1662
1663 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1664 return 0;
1665}
1666
1667fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
1668{
1669 u32 msr = kvm_rcx_read(vcpu);
1670 u64 data;
1671 fastpath_t ret = EXIT_FASTPATH_NONE;
1672
1673 switch (msr) {
1674 case APIC_BASE_MSR + (APIC_ICR >> 4):
1675 data = kvm_read_edx_eax(vcpu);
1676 if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) {
1677 kvm_skip_emulated_instruction(vcpu);
1678 ret = EXIT_FASTPATH_EXIT_HANDLED;
1679 }
1680 break;
1681 case MSR_IA32_TSCDEADLINE:
1682 data = kvm_read_edx_eax(vcpu);
1683 if (!handle_fastpath_set_tscdeadline(vcpu, data)) {
1684 kvm_skip_emulated_instruction(vcpu);
1685 ret = EXIT_FASTPATH_REENTER_GUEST;
1686 }
1687 break;
1688 default:
1689 break;
1690 }
1691
1692 if (ret != EXIT_FASTPATH_NONE)
1693 trace_kvm_msr_write(msr, data);
1694
1695 return ret;
1696}
1697EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
1698
1699
1700
1701
1702static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1703{
1704 return kvm_get_msr_ignored_check(vcpu, index, data, true);
1705}
1706
1707static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1708{
1709 return kvm_set_msr_ignored_check(vcpu, index, *data, true);
1710}
1711
1712#ifdef CONFIG_X86_64
1713struct pvclock_clock {
1714 int vclock_mode;
1715 u64 cycle_last;
1716 u64 mask;
1717 u32 mult;
1718 u32 shift;
1719 u64 base_cycles;
1720 u64 offset;
1721};
1722
1723struct pvclock_gtod_data {
1724 seqcount_t seq;
1725
1726 struct pvclock_clock clock;
1727 struct pvclock_clock raw_clock;
1728
1729 ktime_t offs_boot;
1730 u64 wall_time_sec;
1731};
1732
1733static struct pvclock_gtod_data pvclock_gtod_data;
1734
1735static void update_pvclock_gtod(struct timekeeper *tk)
1736{
1737 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1738
1739 write_seqcount_begin(&vdata->seq);
1740
1741
1742 vdata->clock.vclock_mode = tk->tkr_mono.clock->vdso_clock_mode;
1743 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1744 vdata->clock.mask = tk->tkr_mono.mask;
1745 vdata->clock.mult = tk->tkr_mono.mult;
1746 vdata->clock.shift = tk->tkr_mono.shift;
1747 vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec;
1748 vdata->clock.offset = tk->tkr_mono.base;
1749
1750 vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->vdso_clock_mode;
1751 vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
1752 vdata->raw_clock.mask = tk->tkr_raw.mask;
1753 vdata->raw_clock.mult = tk->tkr_raw.mult;
1754 vdata->raw_clock.shift = tk->tkr_raw.shift;
1755 vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec;
1756 vdata->raw_clock.offset = tk->tkr_raw.base;
1757
1758 vdata->wall_time_sec = tk->xtime_sec;
1759
1760 vdata->offs_boot = tk->offs_boot;
1761
1762 write_seqcount_end(&vdata->seq);
1763}
1764
1765static s64 get_kvmclock_base_ns(void)
1766{
1767
1768 return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot));
1769}
1770#else
1771static s64 get_kvmclock_base_ns(void)
1772{
1773
1774 return ktime_get_boottime_ns();
1775}
1776#endif
1777
1778void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
1779{
1780 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1781 kvm_vcpu_kick(vcpu);
1782}
1783
1784static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1785{
1786 int version;
1787 int r;
1788 struct pvclock_wall_clock wc;
1789 u64 wall_nsec;
1790
1791 if (!wall_clock)
1792 return;
1793
1794 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1795 if (r)
1796 return;
1797
1798 if (version & 1)
1799 ++version;
1800
1801 ++version;
1802
1803 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
1804 return;
1805
1806
1807
1808
1809
1810
1811 wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
1812
1813 wc.nsec = do_div(wall_nsec, 1000000000);
1814 wc.sec = (u32)wall_nsec;
1815 wc.version = version;
1816
1817 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1818
1819 version++;
1820 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1821}
1822
1823static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1824{
1825 do_shl32_div32(dividend, divisor);
1826 return dividend;
1827}
1828
1829static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1830 s8 *pshift, u32 *pmultiplier)
1831{
1832 uint64_t scaled64;
1833 int32_t shift = 0;
1834 uint64_t tps64;
1835 uint32_t tps32;
1836
1837 tps64 = base_hz;
1838 scaled64 = scaled_hz;
1839 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1840 tps64 >>= 1;
1841 shift--;
1842 }
1843
1844 tps32 = (uint32_t)tps64;
1845 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1846 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1847 scaled64 >>= 1;
1848 else
1849 tps32 <<= 1;
1850 shift++;
1851 }
1852
1853 *pshift = shift;
1854 *pmultiplier = div_frac(scaled64, tps32);
1855}
1856
1857#ifdef CONFIG_X86_64
1858static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1859#endif
1860
1861static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1862static unsigned long max_tsc_khz;
1863
1864static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1865{
1866 u64 v = (u64)khz * (1000000 + ppm);
1867 do_div(v, 1000000);
1868 return v;
1869}
1870
1871static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1872{
1873 u64 ratio;
1874
1875
1876 if (!scale) {
1877 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1878 return 0;
1879 }
1880
1881
1882 if (!kvm_has_tsc_control) {
1883 if (user_tsc_khz > tsc_khz) {
1884 vcpu->arch.tsc_catchup = 1;
1885 vcpu->arch.tsc_always_catchup = 1;
1886 return 0;
1887 } else {
1888 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
1889 return -1;
1890 }
1891 }
1892
1893
1894 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
1895 user_tsc_khz, tsc_khz);
1896
1897 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
1898 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
1899 user_tsc_khz);
1900 return -1;
1901 }
1902
1903 vcpu->arch.tsc_scaling_ratio = ratio;
1904 return 0;
1905}
1906
1907static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1908{
1909 u32 thresh_lo, thresh_hi;
1910 int use_scaling = 0;
1911
1912
1913 if (user_tsc_khz == 0) {
1914
1915 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1916 return -1;
1917 }
1918
1919
1920 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
1921 &vcpu->arch.virtual_tsc_shift,
1922 &vcpu->arch.virtual_tsc_mult);
1923 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
1924
1925
1926
1927
1928
1929
1930
1931 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1932 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1933 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
1934 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
1935 use_scaling = 1;
1936 }
1937 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
1938}
1939
1940static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1941{
1942 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1943 vcpu->arch.virtual_tsc_mult,
1944 vcpu->arch.virtual_tsc_shift);
1945 tsc += vcpu->arch.this_tsc_write;
1946 return tsc;
1947}
1948
1949static inline int gtod_is_based_on_tsc(int mode)
1950{
1951 return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
1952}
1953
1954static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1955{
1956#ifdef CONFIG_X86_64
1957 bool vcpus_matched;
1958 struct kvm_arch *ka = &vcpu->kvm->arch;
1959 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1960
1961 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1962 atomic_read(&vcpu->kvm->online_vcpus));
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972 if (ka->use_master_clock ||
1973 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
1974 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1975
1976 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1977 atomic_read(&vcpu->kvm->online_vcpus),
1978 ka->use_master_clock, gtod->clock.vclock_mode);
1979#endif
1980}
1981
1982static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1983{
1984 u64 curr_offset = vcpu->arch.l1_tsc_offset;
1985 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1986}
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998static inline u64 __scale_tsc(u64 ratio, u64 tsc)
1999{
2000 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
2001}
2002
2003u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
2004{
2005 u64 _tsc = tsc;
2006 u64 ratio = vcpu->arch.tsc_scaling_ratio;
2007
2008 if (ratio != kvm_default_tsc_scaling_ratio)
2009 _tsc = __scale_tsc(ratio, tsc);
2010
2011 return _tsc;
2012}
2013EXPORT_SYMBOL_GPL(kvm_scale_tsc);
2014
2015static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
2016{
2017 u64 tsc;
2018
2019 tsc = kvm_scale_tsc(vcpu, rdtsc());
2020
2021 return target_tsc - tsc;
2022}
2023
2024u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
2025{
2026 return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
2027}
2028EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
2029
2030static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
2031{
2032 vcpu->arch.l1_tsc_offset = offset;
2033 vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset);
2034}
2035
2036static inline bool kvm_check_tsc_unstable(void)
2037{
2038#ifdef CONFIG_X86_64
2039
2040
2041
2042
2043 if (pvclock_gtod_data.clock.vclock_mode == VDSO_CLOCKMODE_HVCLOCK)
2044 return false;
2045#endif
2046 return check_tsc_unstable();
2047}
2048
2049void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
2050{
2051 struct kvm *kvm = vcpu->kvm;
2052 u64 offset, ns, elapsed;
2053 unsigned long flags;
2054 bool matched;
2055 bool already_matched;
2056 u64 data = msr->data;
2057 bool synchronizing = false;
2058
2059 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
2060 offset = kvm_compute_tsc_offset(vcpu, data);
2061 ns = get_kvmclock_base_ns();
2062 elapsed = ns - kvm->arch.last_tsc_nsec;
2063
2064 if (vcpu->arch.virtual_tsc_khz) {
2065 if (data == 0 && msr->host_initiated) {
2066
2067
2068
2069
2070
2071 synchronizing = true;
2072 } else {
2073 u64 tsc_exp = kvm->arch.last_tsc_write +
2074 nsec_to_cycles(vcpu, elapsed);
2075 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
2076
2077
2078
2079
2080
2081 synchronizing = data < tsc_exp + tsc_hz &&
2082 data + tsc_hz > tsc_exp;
2083 }
2084 }
2085
2086
2087
2088
2089
2090
2091
2092 if (synchronizing &&
2093 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
2094 if (!kvm_check_tsc_unstable()) {
2095 offset = kvm->arch.cur_tsc_offset;
2096 } else {
2097 u64 delta = nsec_to_cycles(vcpu, elapsed);
2098 data += delta;
2099 offset = kvm_compute_tsc_offset(vcpu, data);
2100 }
2101 matched = true;
2102 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
2103 } else {
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113 kvm->arch.cur_tsc_generation++;
2114 kvm->arch.cur_tsc_nsec = ns;
2115 kvm->arch.cur_tsc_write = data;
2116 kvm->arch.cur_tsc_offset = offset;
2117 matched = false;
2118 }
2119
2120
2121
2122
2123
2124 kvm->arch.last_tsc_nsec = ns;
2125 kvm->arch.last_tsc_write = data;
2126 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
2127
2128 vcpu->arch.last_guest_tsc = data;
2129
2130
2131 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
2132 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
2133 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
2134
2135 if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
2136 update_ia32_tsc_adjust_msr(vcpu, offset);
2137
2138 kvm_vcpu_write_tsc_offset(vcpu, offset);
2139 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
2140
2141 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
2142 if (!matched) {
2143 kvm->arch.nr_vcpus_matched_tsc = 0;
2144 } else if (!already_matched) {
2145 kvm->arch.nr_vcpus_matched_tsc++;
2146 }
2147
2148 kvm_track_tsc_matching(vcpu);
2149 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
2150}
2151
2152EXPORT_SYMBOL_GPL(kvm_write_tsc);
2153
2154static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
2155 s64 adjustment)
2156{
2157 u64 tsc_offset = vcpu->arch.l1_tsc_offset;
2158 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
2159}
2160
2161static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
2162{
2163 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
2164 WARN_ON(adjustment < 0);
2165 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
2166 adjust_tsc_offset_guest(vcpu, adjustment);
2167}
2168
2169#ifdef CONFIG_X86_64
2170
2171static u64 read_tsc(void)
2172{
2173 u64 ret = (u64)rdtsc_ordered();
2174 u64 last = pvclock_gtod_data.clock.cycle_last;
2175
2176 if (likely(ret >= last))
2177 return ret;
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187 asm volatile ("");
2188 return last;
2189}
2190
2191static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
2192 int *mode)
2193{
2194 long v;
2195 u64 tsc_pg_val;
2196
2197 switch (clock->vclock_mode) {
2198 case VDSO_CLOCKMODE_HVCLOCK:
2199 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2200 tsc_timestamp);
2201 if (tsc_pg_val != U64_MAX) {
2202
2203 *mode = VDSO_CLOCKMODE_HVCLOCK;
2204 v = (tsc_pg_val - clock->cycle_last) &
2205 clock->mask;
2206 } else {
2207
2208 *mode = VDSO_CLOCKMODE_NONE;
2209 }
2210 break;
2211 case VDSO_CLOCKMODE_TSC:
2212 *mode = VDSO_CLOCKMODE_TSC;
2213 *tsc_timestamp = read_tsc();
2214 v = (*tsc_timestamp - clock->cycle_last) &
2215 clock->mask;
2216 break;
2217 default:
2218 *mode = VDSO_CLOCKMODE_NONE;
2219 }
2220
2221 if (*mode == VDSO_CLOCKMODE_NONE)
2222 *tsc_timestamp = v = 0;
2223
2224 return v * clock->mult;
2225}
2226
2227static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
2228{
2229 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2230 unsigned long seq;
2231 int mode;
2232 u64 ns;
2233
2234 do {
2235 seq = read_seqcount_begin(>od->seq);
2236 ns = gtod->raw_clock.base_cycles;
2237 ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
2238 ns >>= gtod->raw_clock.shift;
2239 ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot));
2240 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2241 *t = ns;
2242
2243 return mode;
2244}
2245
2246static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2247{
2248 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2249 unsigned long seq;
2250 int mode;
2251 u64 ns;
2252
2253 do {
2254 seq = read_seqcount_begin(>od->seq);
2255 ts->tv_sec = gtod->wall_time_sec;
2256 ns = gtod->clock.base_cycles;
2257 ns += vgettsc(>od->clock, tsc_timestamp, &mode);
2258 ns >>= gtod->clock.shift;
2259 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2260
2261 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2262 ts->tv_nsec = ns;
2263
2264 return mode;
2265}
2266
2267
2268static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2269{
2270
2271 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2272 return false;
2273
2274 return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
2275 tsc_timestamp));
2276}
2277
2278
2279static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2280 u64 *tsc_timestamp)
2281{
2282
2283 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2284 return false;
2285
2286 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2287}
2288#endif
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2332{
2333#ifdef CONFIG_X86_64
2334 struct kvm_arch *ka = &kvm->arch;
2335 int vclock_mode;
2336 bool host_tsc_clocksource, vcpus_matched;
2337
2338 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2339 atomic_read(&kvm->online_vcpus));
2340
2341
2342
2343
2344
2345 host_tsc_clocksource = kvm_get_time_and_clockread(
2346 &ka->master_kernel_ns,
2347 &ka->master_cycle_now);
2348
2349 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2350 && !ka->backwards_tsc_observed
2351 && !ka->boot_vcpu_runs_old_kvmclock;
2352
2353 if (ka->use_master_clock)
2354 atomic_set(&kvm_guest_has_master_clock, 1);
2355
2356 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2357 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2358 vcpus_matched);
2359#endif
2360}
2361
2362void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2363{
2364 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2365}
2366
2367static void kvm_gen_update_masterclock(struct kvm *kvm)
2368{
2369#ifdef CONFIG_X86_64
2370 int i;
2371 struct kvm_vcpu *vcpu;
2372 struct kvm_arch *ka = &kvm->arch;
2373
2374 spin_lock(&ka->pvclock_gtod_sync_lock);
2375 kvm_make_mclock_inprogress_request(kvm);
2376
2377 pvclock_update_vm_gtod_copy(kvm);
2378
2379 kvm_for_each_vcpu(i, vcpu, kvm)
2380 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2381
2382
2383 kvm_for_each_vcpu(i, vcpu, kvm)
2384 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2385
2386 spin_unlock(&ka->pvclock_gtod_sync_lock);
2387#endif
2388}
2389
2390u64 get_kvmclock_ns(struct kvm *kvm)
2391{
2392 struct kvm_arch *ka = &kvm->arch;
2393 struct pvclock_vcpu_time_info hv_clock;
2394 u64 ret;
2395
2396 spin_lock(&ka->pvclock_gtod_sync_lock);
2397 if (!ka->use_master_clock) {
2398 spin_unlock(&ka->pvclock_gtod_sync_lock);
2399 return get_kvmclock_base_ns() + ka->kvmclock_offset;
2400 }
2401
2402 hv_clock.tsc_timestamp = ka->master_cycle_now;
2403 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2404 spin_unlock(&ka->pvclock_gtod_sync_lock);
2405
2406
2407 get_cpu();
2408
2409 if (__this_cpu_read(cpu_tsc_khz)) {
2410 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2411 &hv_clock.tsc_shift,
2412 &hv_clock.tsc_to_system_mul);
2413 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
2414 } else
2415 ret = get_kvmclock_base_ns() + ka->kvmclock_offset;
2416
2417 put_cpu();
2418
2419 return ret;
2420}
2421
2422static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
2423{
2424 struct kvm_vcpu_arch *vcpu = &v->arch;
2425 struct pvclock_vcpu_time_info guest_hv_clock;
2426
2427 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
2428 &guest_hv_clock, sizeof(guest_hv_clock))))
2429 return;
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2446
2447 if (guest_hv_clock.version & 1)
2448 ++guest_hv_clock.version;
2449
2450 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2451 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2452 &vcpu->hv_clock,
2453 sizeof(vcpu->hv_clock.version));
2454
2455 smp_wmb();
2456
2457
2458 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2459
2460 if (vcpu->pvclock_set_guest_stopped_request) {
2461 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2462 vcpu->pvclock_set_guest_stopped_request = false;
2463 }
2464
2465 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2466
2467 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2468 &vcpu->hv_clock,
2469 sizeof(vcpu->hv_clock));
2470
2471 smp_wmb();
2472
2473 vcpu->hv_clock.version++;
2474 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2475 &vcpu->hv_clock,
2476 sizeof(vcpu->hv_clock.version));
2477}
2478
2479static int kvm_guest_time_update(struct kvm_vcpu *v)
2480{
2481 unsigned long flags, tgt_tsc_khz;
2482 struct kvm_vcpu_arch *vcpu = &v->arch;
2483 struct kvm_arch *ka = &v->kvm->arch;
2484 s64 kernel_ns;
2485 u64 tsc_timestamp, host_tsc;
2486 u8 pvclock_flags;
2487 bool use_master_clock;
2488
2489 kernel_ns = 0;
2490 host_tsc = 0;
2491
2492
2493
2494
2495
2496 spin_lock(&ka->pvclock_gtod_sync_lock);
2497 use_master_clock = ka->use_master_clock;
2498 if (use_master_clock) {
2499 host_tsc = ka->master_cycle_now;
2500 kernel_ns = ka->master_kernel_ns;
2501 }
2502 spin_unlock(&ka->pvclock_gtod_sync_lock);
2503
2504
2505 local_irq_save(flags);
2506 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2507 if (unlikely(tgt_tsc_khz == 0)) {
2508 local_irq_restore(flags);
2509 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2510 return 1;
2511 }
2512 if (!use_master_clock) {
2513 host_tsc = rdtsc();
2514 kernel_ns = get_kvmclock_base_ns();
2515 }
2516
2517 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529 if (vcpu->tsc_catchup) {
2530 u64 tsc = compute_guest_tsc(v, kernel_ns);
2531 if (tsc > tsc_timestamp) {
2532 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
2533 tsc_timestamp = tsc;
2534 }
2535 }
2536
2537 local_irq_restore(flags);
2538
2539
2540
2541 if (kvm_has_tsc_control)
2542 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
2543
2544 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
2545 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
2546 &vcpu->hv_clock.tsc_shift,
2547 &vcpu->hv_clock.tsc_to_system_mul);
2548 vcpu->hw_tsc_khz = tgt_tsc_khz;
2549 }
2550
2551 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
2552 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
2553 vcpu->last_guest_tsc = tsc_timestamp;
2554
2555
2556 pvclock_flags = 0;
2557 if (use_master_clock)
2558 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
2559
2560 vcpu->hv_clock.flags = pvclock_flags;
2561
2562 if (vcpu->pv_time_enabled)
2563 kvm_setup_pvclock_page(v);
2564 if (v == kvm_get_vcpu(v->kvm, 0))
2565 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
2566 return 0;
2567}
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
2584
2585static void kvmclock_update_fn(struct work_struct *work)
2586{
2587 int i;
2588 struct delayed_work *dwork = to_delayed_work(work);
2589 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2590 kvmclock_update_work);
2591 struct kvm *kvm = container_of(ka, struct kvm, arch);
2592 struct kvm_vcpu *vcpu;
2593
2594 kvm_for_each_vcpu(i, vcpu, kvm) {
2595 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2596 kvm_vcpu_kick(vcpu);
2597 }
2598}
2599
2600static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
2601{
2602 struct kvm *kvm = v->kvm;
2603
2604 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2605 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
2606 KVMCLOCK_UPDATE_DELAY);
2607}
2608
2609#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
2610
2611static void kvmclock_sync_fn(struct work_struct *work)
2612{
2613 struct delayed_work *dwork = to_delayed_work(work);
2614 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2615 kvmclock_sync_work);
2616 struct kvm *kvm = container_of(ka, struct kvm, arch);
2617
2618 if (!kvmclock_periodic_sync)
2619 return;
2620
2621 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
2622 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
2623 KVMCLOCK_SYNC_PERIOD);
2624}
2625
2626
2627
2628
2629static bool can_set_mci_status(struct kvm_vcpu *vcpu)
2630{
2631
2632 if (guest_cpuid_is_amd_or_hygon(vcpu))
2633 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
2634
2635 return false;
2636}
2637
2638static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2639{
2640 u64 mcg_cap = vcpu->arch.mcg_cap;
2641 unsigned bank_num = mcg_cap & 0xff;
2642 u32 msr = msr_info->index;
2643 u64 data = msr_info->data;
2644
2645 switch (msr) {
2646 case MSR_IA32_MCG_STATUS:
2647 vcpu->arch.mcg_status = data;
2648 break;
2649 case MSR_IA32_MCG_CTL:
2650 if (!(mcg_cap & MCG_CTL_P) &&
2651 (data || !msr_info->host_initiated))
2652 return 1;
2653 if (data != 0 && data != ~(u64)0)
2654 return 1;
2655 vcpu->arch.mcg_ctl = data;
2656 break;
2657 default:
2658 if (msr >= MSR_IA32_MC0_CTL &&
2659 msr < MSR_IA32_MCx_CTL(bank_num)) {
2660 u32 offset = array_index_nospec(
2661 msr - MSR_IA32_MC0_CTL,
2662 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
2663
2664
2665
2666
2667
2668
2669 if ((offset & 0x3) == 0 &&
2670 data != 0 && (data | (1 << 10)) != ~(u64)0)
2671 return -1;
2672
2673
2674 if (!msr_info->host_initiated &&
2675 (offset & 0x3) == 1 && data != 0) {
2676 if (!can_set_mci_status(vcpu))
2677 return -1;
2678 }
2679
2680 vcpu->arch.mce_banks[offset] = data;
2681 break;
2682 }
2683 return 1;
2684 }
2685 return 0;
2686}
2687
2688static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
2689{
2690 struct kvm *kvm = vcpu->kvm;
2691 int lm = is_long_mode(vcpu);
2692 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
2693 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
2694 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
2695 : kvm->arch.xen_hvm_config.blob_size_32;
2696 u32 page_num = data & ~PAGE_MASK;
2697 u64 page_addr = data & PAGE_MASK;
2698 u8 *page;
2699 int r;
2700
2701 r = -E2BIG;
2702 if (page_num >= blob_size)
2703 goto out;
2704 r = -ENOMEM;
2705 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
2706 if (IS_ERR(page)) {
2707 r = PTR_ERR(page);
2708 goto out;
2709 }
2710 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
2711 goto out_free;
2712 r = 0;
2713out_free:
2714 kfree(page);
2715out:
2716 return r;
2717}
2718
2719static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
2720{
2721 u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
2722
2723 return (vcpu->arch.apf.msr_en_val & mask) == mask;
2724}
2725
2726static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2727{
2728 gpa_t gpa = data & ~0x3f;
2729
2730
2731 if (data & 0x30)
2732 return 1;
2733
2734 if (!lapic_in_kernel(vcpu))
2735 return data ? 1 : 0;
2736
2737 vcpu->arch.apf.msr_en_val = data;
2738
2739 if (!kvm_pv_async_pf_enabled(vcpu)) {
2740 kvm_clear_async_pf_completion_queue(vcpu);
2741 kvm_async_pf_hash_reset(vcpu);
2742 return 0;
2743 }
2744
2745 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
2746 sizeof(u64)))
2747 return 1;
2748
2749 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
2750 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
2751
2752 kvm_async_pf_wakeup_all(vcpu);
2753
2754 return 0;
2755}
2756
2757static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
2758{
2759
2760 if (data >> 8)
2761 return 1;
2762
2763 if (!lapic_in_kernel(vcpu))
2764 return 1;
2765
2766 vcpu->arch.apf.msr_int_val = data;
2767
2768 vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK;
2769
2770 return 0;
2771}
2772
2773static void kvmclock_reset(struct kvm_vcpu *vcpu)
2774{
2775 vcpu->arch.pv_time_enabled = false;
2776 vcpu->arch.time = 0;
2777}
2778
2779static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
2780{
2781 ++vcpu->stat.tlb_flush;
2782 kvm_x86_ops.tlb_flush_all(vcpu);
2783}
2784
2785static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
2786{
2787 ++vcpu->stat.tlb_flush;
2788 kvm_x86_ops.tlb_flush_guest(vcpu);
2789}
2790
2791static void record_steal_time(struct kvm_vcpu *vcpu)
2792{
2793 struct kvm_host_map map;
2794 struct kvm_steal_time *st;
2795
2796 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2797 return;
2798
2799
2800 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
2801 &map, &vcpu->arch.st.cache, false))
2802 return;
2803
2804 st = map.hva +
2805 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
2806
2807
2808
2809
2810
2811 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
2812 st->preempted & KVM_VCPU_FLUSH_TLB);
2813 if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
2814 kvm_vcpu_flush_tlb_guest(vcpu);
2815
2816 vcpu->arch.st.preempted = 0;
2817
2818 if (st->version & 1)
2819 st->version += 1;
2820
2821 st->version += 1;
2822
2823 smp_wmb();
2824
2825 st->steal += current->sched_info.run_delay -
2826 vcpu->arch.st.last_steal;
2827 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2828
2829 smp_wmb();
2830
2831 st->version += 1;
2832
2833 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
2834}
2835
2836int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2837{
2838 bool pr = false;
2839 u32 msr = msr_info->index;
2840 u64 data = msr_info->data;
2841
2842 switch (msr) {
2843 case MSR_AMD64_NB_CFG:
2844 case MSR_IA32_UCODE_WRITE:
2845 case MSR_VM_HSAVE_PA:
2846 case MSR_AMD64_PATCH_LOADER:
2847 case MSR_AMD64_BU_CFG2:
2848 case MSR_AMD64_DC_CFG:
2849 case MSR_F15H_EX_CFG:
2850 break;
2851
2852 case MSR_IA32_UCODE_REV:
2853 if (msr_info->host_initiated)
2854 vcpu->arch.microcode_version = data;
2855 break;
2856 case MSR_IA32_ARCH_CAPABILITIES:
2857 if (!msr_info->host_initiated)
2858 return 1;
2859 vcpu->arch.arch_capabilities = data;
2860 break;
2861 case MSR_IA32_PERF_CAPABILITIES: {
2862 struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
2863
2864 if (!msr_info->host_initiated)
2865 return 1;
2866 if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
2867 return 1;
2868 if (data & ~msr_ent.data)
2869 return 1;
2870
2871 vcpu->arch.perf_capabilities = data;
2872
2873 return 0;
2874 }
2875 case MSR_EFER:
2876 return set_efer(vcpu, msr_info);
2877 case MSR_K7_HWCR:
2878 data &= ~(u64)0x40;
2879 data &= ~(u64)0x100;
2880 data &= ~(u64)0x8;
2881
2882
2883 if (data == BIT_ULL(18)) {
2884 vcpu->arch.msr_hwcr = data;
2885 } else if (data != 0) {
2886 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2887 data);
2888 return 1;
2889 }
2890 break;
2891 case MSR_FAM10H_MMIO_CONF_BASE:
2892 if (data != 0) {
2893 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2894 "0x%llx\n", data);
2895 return 1;
2896 }
2897 break;
2898 case MSR_IA32_DEBUGCTLMSR:
2899 if (!data) {
2900
2901 break;
2902 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2903
2904
2905 return 1;
2906 }
2907 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2908 __func__, data);
2909 break;
2910 case 0x200 ... 0x2ff:
2911 return kvm_mtrr_set_msr(vcpu, msr, data);
2912 case MSR_IA32_APICBASE:
2913 return kvm_set_apic_base(vcpu, msr_info);
2914 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
2915 return kvm_x2apic_msr_write(vcpu, msr, data);
2916 case MSR_IA32_TSCDEADLINE:
2917 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2918 break;
2919 case MSR_IA32_TSC_ADJUST:
2920 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
2921 if (!msr_info->host_initiated) {
2922 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2923 adjust_tsc_offset_guest(vcpu, adj);
2924 }
2925 vcpu->arch.ia32_tsc_adjust_msr = data;
2926 }
2927 break;
2928 case MSR_IA32_MISC_ENABLE:
2929 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
2930 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
2931 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
2932 return 1;
2933 vcpu->arch.ia32_misc_enable_msr = data;
2934 kvm_update_cpuid_runtime(vcpu);
2935 } else {
2936 vcpu->arch.ia32_misc_enable_msr = data;
2937 }
2938 break;
2939 case MSR_IA32_SMBASE:
2940 if (!msr_info->host_initiated)
2941 return 1;
2942 vcpu->arch.smbase = data;
2943 break;
2944 case MSR_IA32_POWER_CTL:
2945 vcpu->arch.msr_ia32_power_ctl = data;
2946 break;
2947 case MSR_IA32_TSC:
2948 kvm_write_tsc(vcpu, msr_info);
2949 break;
2950 case MSR_IA32_XSS:
2951 if (!msr_info->host_initiated &&
2952 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
2953 return 1;
2954
2955
2956
2957
2958
2959 if (data & ~supported_xss)
2960 return 1;
2961 vcpu->arch.ia32_xss = data;
2962 break;
2963 case MSR_SMI_COUNT:
2964 if (!msr_info->host_initiated)
2965 return 1;
2966 vcpu->arch.smi_count = data;
2967 break;
2968 case MSR_KVM_WALL_CLOCK_NEW:
2969 case MSR_KVM_WALL_CLOCK:
2970 vcpu->kvm->arch.wall_clock = data;
2971 kvm_write_wall_clock(vcpu->kvm, data);
2972 break;
2973 case MSR_KVM_SYSTEM_TIME_NEW:
2974 case MSR_KVM_SYSTEM_TIME: {
2975 struct kvm_arch *ka = &vcpu->kvm->arch;
2976
2977 if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
2978 bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
2979
2980 if (ka->boot_vcpu_runs_old_kvmclock != tmp)
2981 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2982
2983 ka->boot_vcpu_runs_old_kvmclock = tmp;
2984 }
2985
2986 vcpu->arch.time = data;
2987 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2988
2989
2990 vcpu->arch.pv_time_enabled = false;
2991 if (!(data & 1))
2992 break;
2993
2994 if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
2995 &vcpu->arch.pv_time, data & ~1ULL,
2996 sizeof(struct pvclock_vcpu_time_info)))
2997 vcpu->arch.pv_time_enabled = true;
2998
2999 break;
3000 }
3001 case MSR_KVM_ASYNC_PF_EN:
3002 if (kvm_pv_enable_async_pf(vcpu, data))
3003 return 1;
3004 break;
3005 case MSR_KVM_ASYNC_PF_INT:
3006 if (kvm_pv_enable_async_pf_int(vcpu, data))
3007 return 1;
3008 break;
3009 case MSR_KVM_ASYNC_PF_ACK:
3010 if (data & 0x1) {
3011 vcpu->arch.apf.pageready_pending = false;
3012 kvm_check_async_pf_completion(vcpu);
3013 }
3014 break;
3015 case MSR_KVM_STEAL_TIME:
3016
3017 if (unlikely(!sched_info_on()))
3018 return 1;
3019
3020 if (data & KVM_STEAL_RESERVED_MASK)
3021 return 1;
3022
3023 vcpu->arch.st.msr_val = data;
3024
3025 if (!(data & KVM_MSR_ENABLED))
3026 break;
3027
3028 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3029
3030 break;
3031 case MSR_KVM_PV_EOI_EN:
3032 if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
3033 return 1;
3034 break;
3035
3036 case MSR_KVM_POLL_CONTROL:
3037
3038 if (data & (-1ULL << 1))
3039 return 1;
3040
3041 vcpu->arch.msr_kvm_poll_control = data;
3042 break;
3043
3044 case MSR_IA32_MCG_CTL:
3045 case MSR_IA32_MCG_STATUS:
3046 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3047 return set_msr_mce(vcpu, msr_info);
3048
3049 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3050 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3051 pr = true;
3052 fallthrough;
3053 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3054 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3055 if (kvm_pmu_is_valid_msr(vcpu, msr))
3056 return kvm_pmu_set_msr(vcpu, msr_info);
3057
3058 if (pr || data != 0)
3059 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
3060 "0x%x data 0x%llx\n", msr, data);
3061 break;
3062 case MSR_K7_CLK_CTL:
3063
3064
3065
3066
3067
3068
3069
3070
3071 break;
3072 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3073 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3074 case HV_X64_MSR_SYNDBG_OPTIONS:
3075 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3076 case HV_X64_MSR_CRASH_CTL:
3077 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3078 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3079 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3080 case HV_X64_MSR_TSC_EMULATION_STATUS:
3081 return kvm_hv_set_msr_common(vcpu, msr, data,
3082 msr_info->host_initiated);
3083 case MSR_IA32_BBL_CR_CTL3:
3084
3085
3086
3087 if (report_ignored_msrs)
3088 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
3089 msr, data);
3090 break;
3091 case MSR_AMD64_OSVW_ID_LENGTH:
3092 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3093 return 1;
3094 vcpu->arch.osvw.length = data;
3095 break;
3096 case MSR_AMD64_OSVW_STATUS:
3097 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3098 return 1;
3099 vcpu->arch.osvw.status = data;
3100 break;
3101 case MSR_PLATFORM_INFO:
3102 if (!msr_info->host_initiated ||
3103 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
3104 cpuid_fault_enabled(vcpu)))
3105 return 1;
3106 vcpu->arch.msr_platform_info = data;
3107 break;
3108 case MSR_MISC_FEATURES_ENABLES:
3109 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
3110 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3111 !supports_cpuid_fault(vcpu)))
3112 return 1;
3113 vcpu->arch.msr_misc_features_enables = data;
3114 break;
3115 default:
3116 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
3117 return xen_hvm_config(vcpu, data);
3118 if (kvm_pmu_is_valid_msr(vcpu, msr))
3119 return kvm_pmu_set_msr(vcpu, msr_info);
3120 return KVM_MSR_RET_INVALID;
3121 }
3122 return 0;
3123}
3124EXPORT_SYMBOL_GPL(kvm_set_msr_common);
3125
3126static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
3127{
3128 u64 data;
3129 u64 mcg_cap = vcpu->arch.mcg_cap;
3130 unsigned bank_num = mcg_cap & 0xff;
3131
3132 switch (msr) {
3133 case MSR_IA32_P5_MC_ADDR:
3134 case MSR_IA32_P5_MC_TYPE:
3135 data = 0;
3136 break;
3137 case MSR_IA32_MCG_CAP:
3138 data = vcpu->arch.mcg_cap;
3139 break;
3140 case MSR_IA32_MCG_CTL:
3141 if (!(mcg_cap & MCG_CTL_P) && !host)
3142 return 1;
3143 data = vcpu->arch.mcg_ctl;
3144 break;
3145 case MSR_IA32_MCG_STATUS:
3146 data = vcpu->arch.mcg_status;
3147 break;
3148 default:
3149 if (msr >= MSR_IA32_MC0_CTL &&
3150 msr < MSR_IA32_MCx_CTL(bank_num)) {
3151 u32 offset = array_index_nospec(
3152 msr - MSR_IA32_MC0_CTL,
3153 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
3154
3155 data = vcpu->arch.mce_banks[offset];
3156 break;
3157 }
3158 return 1;
3159 }
3160 *pdata = data;
3161 return 0;
3162}
3163
3164int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3165{
3166 switch (msr_info->index) {
3167 case MSR_IA32_PLATFORM_ID:
3168 case MSR_IA32_EBL_CR_POWERON:
3169 case MSR_IA32_DEBUGCTLMSR:
3170 case MSR_IA32_LASTBRANCHFROMIP:
3171 case MSR_IA32_LASTBRANCHTOIP:
3172 case MSR_IA32_LASTINTFROMIP:
3173 case MSR_IA32_LASTINTTOIP:
3174 case MSR_K8_SYSCFG:
3175 case MSR_K8_TSEG_ADDR:
3176 case MSR_K8_TSEG_MASK:
3177 case MSR_VM_HSAVE_PA:
3178 case MSR_K8_INT_PENDING_MSG:
3179 case MSR_AMD64_NB_CFG:
3180 case MSR_FAM10H_MMIO_CONF_BASE:
3181 case MSR_AMD64_BU_CFG2:
3182 case MSR_IA32_PERF_CTL:
3183 case MSR_AMD64_DC_CFG:
3184 case MSR_F15H_EX_CFG:
3185
3186
3187
3188
3189
3190
3191 case MSR_RAPL_POWER_UNIT:
3192 case MSR_PP0_ENERGY_STATUS:
3193 case MSR_PP1_ENERGY_STATUS:
3194 case MSR_PKG_ENERGY_STATUS:
3195 case MSR_DRAM_ENERGY_STATUS:
3196 msr_info->data = 0;
3197 break;
3198 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3199 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3200 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3201 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3202 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3203 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3204 return kvm_pmu_get_msr(vcpu, msr_info);
3205 msr_info->data = 0;
3206 break;
3207 case MSR_IA32_UCODE_REV:
3208 msr_info->data = vcpu->arch.microcode_version;
3209 break;
3210 case MSR_IA32_ARCH_CAPABILITIES:
3211 if (!msr_info->host_initiated &&
3212 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3213 return 1;
3214 msr_info->data = vcpu->arch.arch_capabilities;
3215 break;
3216 case MSR_IA32_PERF_CAPABILITIES:
3217 if (!msr_info->host_initiated &&
3218 !guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
3219 return 1;
3220 msr_info->data = vcpu->arch.perf_capabilities;
3221 break;
3222 case MSR_IA32_POWER_CTL:
3223 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
3224 break;
3225 case MSR_IA32_TSC: {
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235 u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
3236 vcpu->arch.tsc_offset;
3237
3238 msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
3239 break;
3240 }
3241 case MSR_MTRRcap:
3242 case 0x200 ... 0x2ff:
3243 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
3244 case 0xcd:
3245 msr_info->data = 3;
3246 break;
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258 case MSR_EBC_FREQUENCY_ID:
3259 msr_info->data = 1 << 24;
3260 break;
3261 case MSR_IA32_APICBASE:
3262 msr_info->data = kvm_get_apic_base(vcpu);
3263 break;
3264 case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
3265 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
3266 case MSR_IA32_TSCDEADLINE:
3267 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
3268 break;
3269 case MSR_IA32_TSC_ADJUST:
3270 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
3271 break;
3272 case MSR_IA32_MISC_ENABLE:
3273 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
3274 break;
3275 case MSR_IA32_SMBASE:
3276 if (!msr_info->host_initiated)
3277 return 1;
3278 msr_info->data = vcpu->arch.smbase;
3279 break;
3280 case MSR_SMI_COUNT:
3281 msr_info->data = vcpu->arch.smi_count;
3282 break;
3283 case MSR_IA32_PERF_STATUS:
3284
3285 msr_info->data = 1000ULL;
3286
3287 msr_info->data |= (((uint64_t)4ULL) << 40);
3288 break;
3289 case MSR_EFER:
3290 msr_info->data = vcpu->arch.efer;
3291 break;
3292 case MSR_KVM_WALL_CLOCK:
3293 case MSR_KVM_WALL_CLOCK_NEW:
3294 msr_info->data = vcpu->kvm->arch.wall_clock;
3295 break;
3296 case MSR_KVM_SYSTEM_TIME:
3297 case MSR_KVM_SYSTEM_TIME_NEW:
3298 msr_info->data = vcpu->arch.time;
3299 break;
3300 case MSR_KVM_ASYNC_PF_EN:
3301 msr_info->data = vcpu->arch.apf.msr_en_val;
3302 break;
3303 case MSR_KVM_ASYNC_PF_INT:
3304 msr_info->data = vcpu->arch.apf.msr_int_val;
3305 break;
3306 case MSR_KVM_ASYNC_PF_ACK:
3307 msr_info->data = 0;
3308 break;
3309 case MSR_KVM_STEAL_TIME:
3310 msr_info->data = vcpu->arch.st.msr_val;
3311 break;
3312 case MSR_KVM_PV_EOI_EN:
3313 msr_info->data = vcpu->arch.pv_eoi.msr_val;
3314 break;
3315 case MSR_KVM_POLL_CONTROL:
3316 msr_info->data = vcpu->arch.msr_kvm_poll_control;
3317 break;
3318 case MSR_IA32_P5_MC_ADDR:
3319 case MSR_IA32_P5_MC_TYPE:
3320 case MSR_IA32_MCG_CAP:
3321 case MSR_IA32_MCG_CTL:
3322 case MSR_IA32_MCG_STATUS:
3323 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3324 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
3325 msr_info->host_initiated);
3326 case MSR_IA32_XSS:
3327 if (!msr_info->host_initiated &&
3328 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3329 return 1;
3330 msr_info->data = vcpu->arch.ia32_xss;
3331 break;
3332 case MSR_K7_CLK_CTL:
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342 msr_info->data = 0x20000000;
3343 break;
3344 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3345 case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
3346 case HV_X64_MSR_SYNDBG_OPTIONS:
3347 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3348 case HV_X64_MSR_CRASH_CTL:
3349 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3350 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3351 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3352 case HV_X64_MSR_TSC_EMULATION_STATUS:
3353 return kvm_hv_get_msr_common(vcpu,
3354 msr_info->index, &msr_info->data,
3355 msr_info->host_initiated);
3356 case MSR_IA32_BBL_CR_CTL3:
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367 msr_info->data = 0xbe702111;
3368 break;
3369 case MSR_AMD64_OSVW_ID_LENGTH:
3370 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3371 return 1;
3372 msr_info->data = vcpu->arch.osvw.length;
3373 break;
3374 case MSR_AMD64_OSVW_STATUS:
3375 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3376 return 1;
3377 msr_info->data = vcpu->arch.osvw.status;
3378 break;
3379 case MSR_PLATFORM_INFO:
3380 if (!msr_info->host_initiated &&
3381 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
3382 return 1;
3383 msr_info->data = vcpu->arch.msr_platform_info;
3384 break;
3385 case MSR_MISC_FEATURES_ENABLES:
3386 msr_info->data = vcpu->arch.msr_misc_features_enables;
3387 break;
3388 case MSR_K7_HWCR:
3389 msr_info->data = vcpu->arch.msr_hwcr;
3390 break;
3391 default:
3392 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3393 return kvm_pmu_get_msr(vcpu, msr_info);
3394 return KVM_MSR_RET_INVALID;
3395 }
3396 return 0;
3397}
3398EXPORT_SYMBOL_GPL(kvm_get_msr_common);
3399
3400
3401
3402
3403
3404
3405static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
3406 struct kvm_msr_entry *entries,
3407 int (*do_msr)(struct kvm_vcpu *vcpu,
3408 unsigned index, u64 *data))
3409{
3410 int i;
3411
3412 for (i = 0; i < msrs->nmsrs; ++i)
3413 if (do_msr(vcpu, entries[i].index, &entries[i].data))
3414 break;
3415
3416 return i;
3417}
3418
3419
3420
3421
3422
3423
3424static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
3425 int (*do_msr)(struct kvm_vcpu *vcpu,
3426 unsigned index, u64 *data),
3427 int writeback)
3428{
3429 struct kvm_msrs msrs;
3430 struct kvm_msr_entry *entries;
3431 int r, n;
3432 unsigned size;
3433
3434 r = -EFAULT;
3435 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
3436 goto out;
3437
3438 r = -E2BIG;
3439 if (msrs.nmsrs >= MAX_IO_MSRS)
3440 goto out;
3441
3442 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
3443 entries = memdup_user(user_msrs->entries, size);
3444 if (IS_ERR(entries)) {
3445 r = PTR_ERR(entries);
3446 goto out;
3447 }
3448
3449 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
3450 if (r < 0)
3451 goto out_free;
3452
3453 r = -EFAULT;
3454 if (writeback && copy_to_user(user_msrs->entries, entries, size))
3455 goto out_free;
3456
3457 r = n;
3458
3459out_free:
3460 kfree(entries);
3461out:
3462 return r;
3463}
3464
3465static inline bool kvm_can_mwait_in_guest(void)
3466{
3467 return boot_cpu_has(X86_FEATURE_MWAIT) &&
3468 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
3469 boot_cpu_has(X86_FEATURE_ARAT);
3470}
3471
3472int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
3473{
3474 int r = 0;
3475
3476 switch (ext) {
3477 case KVM_CAP_IRQCHIP:
3478 case KVM_CAP_HLT:
3479 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
3480 case KVM_CAP_SET_TSS_ADDR:
3481 case KVM_CAP_EXT_CPUID:
3482 case KVM_CAP_EXT_EMUL_CPUID:
3483 case KVM_CAP_CLOCKSOURCE:
3484 case KVM_CAP_PIT:
3485 case KVM_CAP_NOP_IO_DELAY:
3486 case KVM_CAP_MP_STATE:
3487 case KVM_CAP_SYNC_MMU:
3488 case KVM_CAP_USER_NMI:
3489 case KVM_CAP_REINJECT_CONTROL:
3490 case KVM_CAP_IRQ_INJECT_STATUS:
3491 case KVM_CAP_IOEVENTFD:
3492 case KVM_CAP_IOEVENTFD_NO_LENGTH:
3493 case KVM_CAP_PIT2:
3494 case KVM_CAP_PIT_STATE2:
3495 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
3496 case KVM_CAP_XEN_HVM:
3497 case KVM_CAP_VCPU_EVENTS:
3498 case KVM_CAP_HYPERV:
3499 case KVM_CAP_HYPERV_VAPIC:
3500 case KVM_CAP_HYPERV_SPIN:
3501 case KVM_CAP_HYPERV_SYNIC:
3502 case KVM_CAP_HYPERV_SYNIC2:
3503 case KVM_CAP_HYPERV_VP_INDEX:
3504 case KVM_CAP_HYPERV_EVENTFD:
3505 case KVM_CAP_HYPERV_TLBFLUSH:
3506 case KVM_CAP_HYPERV_SEND_IPI:
3507 case KVM_CAP_HYPERV_CPUID:
3508 case KVM_CAP_PCI_SEGMENT:
3509 case KVM_CAP_DEBUGREGS:
3510 case KVM_CAP_X86_ROBUST_SINGLESTEP:
3511 case KVM_CAP_XSAVE:
3512 case KVM_CAP_ASYNC_PF:
3513 case KVM_CAP_ASYNC_PF_INT:
3514 case KVM_CAP_GET_TSC_KHZ:
3515 case KVM_CAP_KVMCLOCK_CTRL:
3516 case KVM_CAP_READONLY_MEM:
3517 case KVM_CAP_HYPERV_TIME:
3518 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
3519 case KVM_CAP_TSC_DEADLINE_TIMER:
3520 case KVM_CAP_DISABLE_QUIRKS:
3521 case KVM_CAP_SET_BOOT_CPU_ID:
3522 case KVM_CAP_SPLIT_IRQCHIP:
3523 case KVM_CAP_IMMEDIATE_EXIT:
3524 case KVM_CAP_PMU_EVENT_FILTER:
3525 case KVM_CAP_GET_MSR_FEATURES:
3526 case KVM_CAP_MSR_PLATFORM_INFO:
3527 case KVM_CAP_EXCEPTION_PAYLOAD:
3528 case KVM_CAP_SET_GUEST_DEBUG:
3529 case KVM_CAP_LAST_CPU:
3530 r = 1;
3531 break;
3532 case KVM_CAP_SYNC_REGS:
3533 r = KVM_SYNC_X86_VALID_FIELDS;
3534 break;
3535 case KVM_CAP_ADJUST_CLOCK:
3536 r = KVM_CLOCK_TSC_STABLE;
3537 break;
3538 case KVM_CAP_X86_DISABLE_EXITS:
3539 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
3540 KVM_X86_DISABLE_EXITS_CSTATE;
3541 if(kvm_can_mwait_in_guest())
3542 r |= KVM_X86_DISABLE_EXITS_MWAIT;
3543 break;
3544 case KVM_CAP_X86_SMM:
3545
3546
3547
3548
3549
3550
3551
3552
3553 r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE);
3554 break;
3555 case KVM_CAP_VAPIC:
3556 r = !kvm_x86_ops.cpu_has_accelerated_tpr();
3557 break;
3558 case KVM_CAP_NR_VCPUS:
3559 r = KVM_SOFT_MAX_VCPUS;
3560 break;
3561 case KVM_CAP_MAX_VCPUS:
3562 r = KVM_MAX_VCPUS;
3563 break;
3564 case KVM_CAP_MAX_VCPU_ID:
3565 r = KVM_MAX_VCPU_ID;
3566 break;
3567 case KVM_CAP_PV_MMU:
3568 r = 0;
3569 break;
3570 case KVM_CAP_MCE:
3571 r = KVM_MAX_MCE_BANKS;
3572 break;
3573 case KVM_CAP_XCRS:
3574 r = boot_cpu_has(X86_FEATURE_XSAVE);
3575 break;
3576 case KVM_CAP_TSC_CONTROL:
3577 r = kvm_has_tsc_control;
3578 break;
3579 case KVM_CAP_X2APIC_API:
3580 r = KVM_X2APIC_API_VALID_FLAGS;
3581 break;
3582 case KVM_CAP_NESTED_STATE:
3583 r = kvm_x86_ops.nested_ops->get_state ?
3584 kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
3585 break;
3586 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
3587 r = kvm_x86_ops.enable_direct_tlbflush != NULL;
3588 break;
3589 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
3590 r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
3591 break;
3592 case KVM_CAP_SMALLER_MAXPHYADDR:
3593 r = (int) allow_smaller_maxphyaddr;
3594 break;
3595 case KVM_CAP_STEAL_TIME:
3596 r = sched_info_on();
3597 break;
3598 default:
3599 break;
3600 }
3601 return r;
3602
3603}
3604
3605long kvm_arch_dev_ioctl(struct file *filp,
3606 unsigned int ioctl, unsigned long arg)
3607{
3608 void __user *argp = (void __user *)arg;
3609 long r;
3610
3611 switch (ioctl) {
3612 case KVM_GET_MSR_INDEX_LIST: {
3613 struct kvm_msr_list __user *user_msr_list = argp;
3614 struct kvm_msr_list msr_list;
3615 unsigned n;
3616
3617 r = -EFAULT;
3618 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3619 goto out;
3620 n = msr_list.nmsrs;
3621 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
3622 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3623 goto out;
3624 r = -E2BIG;
3625 if (n < msr_list.nmsrs)
3626 goto out;
3627 r = -EFAULT;
3628 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
3629 num_msrs_to_save * sizeof(u32)))
3630 goto out;
3631 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
3632 &emulated_msrs,
3633 num_emulated_msrs * sizeof(u32)))
3634 goto out;
3635 r = 0;
3636 break;
3637 }
3638 case KVM_GET_SUPPORTED_CPUID:
3639 case KVM_GET_EMULATED_CPUID: {
3640 struct kvm_cpuid2 __user *cpuid_arg = argp;
3641 struct kvm_cpuid2 cpuid;
3642
3643 r = -EFAULT;
3644 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
3645 goto out;
3646
3647 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
3648 ioctl);
3649 if (r)
3650 goto out;
3651
3652 r = -EFAULT;
3653 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
3654 goto out;
3655 r = 0;
3656 break;
3657 }
3658 case KVM_X86_GET_MCE_CAP_SUPPORTED:
3659 r = -EFAULT;
3660 if (copy_to_user(argp, &kvm_mce_cap_supported,
3661 sizeof(kvm_mce_cap_supported)))
3662 goto out;
3663 r = 0;
3664 break;
3665 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
3666 struct kvm_msr_list __user *user_msr_list = argp;
3667 struct kvm_msr_list msr_list;
3668 unsigned int n;
3669
3670 r = -EFAULT;
3671 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3672 goto out;
3673 n = msr_list.nmsrs;
3674 msr_list.nmsrs = num_msr_based_features;
3675 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3676 goto out;
3677 r = -E2BIG;
3678 if (n < msr_list.nmsrs)
3679 goto out;
3680 r = -EFAULT;
3681 if (copy_to_user(user_msr_list->indices, &msr_based_features,
3682 num_msr_based_features * sizeof(u32)))
3683 goto out;
3684 r = 0;
3685 break;
3686 }
3687 case KVM_GET_MSRS:
3688 r = msr_io(NULL, argp, do_get_msr_feature, 1);
3689 break;
3690 default:
3691 r = -EINVAL;
3692 break;
3693 }
3694out:
3695 return r;
3696}
3697
3698static void wbinvd_ipi(void *garbage)
3699{
3700 wbinvd();
3701}
3702
3703static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
3704{
3705 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
3706}
3707
3708void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3709{
3710
3711 if (need_emulate_wbinvd(vcpu)) {
3712 if (kvm_x86_ops.has_wbinvd_exit())
3713 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
3714 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
3715 smp_call_function_single(vcpu->cpu,
3716 wbinvd_ipi, NULL, 1);
3717 }
3718
3719 kvm_x86_ops.vcpu_load(vcpu, cpu);
3720
3721
3722 vcpu->arch.host_pkru = read_pkru();
3723
3724
3725 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
3726 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
3727 vcpu->arch.tsc_offset_adjustment = 0;
3728 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3729 }
3730
3731 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
3732 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
3733 rdtsc() - vcpu->arch.last_host_tsc;
3734 if (tsc_delta < 0)
3735 mark_tsc_unstable("KVM discovered backwards TSC");
3736
3737 if (kvm_check_tsc_unstable()) {
3738 u64 offset = kvm_compute_tsc_offset(vcpu,
3739 vcpu->arch.last_guest_tsc);
3740 kvm_vcpu_write_tsc_offset(vcpu, offset);
3741 vcpu->arch.tsc_catchup = 1;
3742 }
3743
3744 if (kvm_lapic_hv_timer_in_use(vcpu))
3745 kvm_lapic_restart_hv_timer(vcpu);
3746
3747
3748
3749
3750
3751 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
3752 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
3753 if (vcpu->cpu != cpu)
3754 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
3755 vcpu->cpu = cpu;
3756 }
3757
3758 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3759}
3760
3761static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
3762{
3763 struct kvm_host_map map;
3764 struct kvm_steal_time *st;
3765
3766 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3767 return;
3768
3769 if (vcpu->arch.st.preempted)
3770 return;
3771
3772 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
3773 &vcpu->arch.st.cache, true))
3774 return;
3775
3776 st = map.hva +
3777 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
3778
3779 st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
3780
3781 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
3782}
3783
3784void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3785{
3786 int idx;
3787
3788 if (vcpu->preempted)
3789 vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799 pagefault_disable();
3800
3801
3802
3803
3804 idx = srcu_read_lock(&vcpu->kvm->srcu);
3805 kvm_steal_time_set_preempted(vcpu);
3806 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3807 pagefault_enable();
3808 kvm_x86_ops.vcpu_put(vcpu);
3809 vcpu->arch.last_host_tsc = rdtsc();
3810
3811
3812
3813
3814
3815 set_debugreg(0, 6);
3816}
3817
3818static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
3819 struct kvm_lapic_state *s)
3820{
3821 if (vcpu->arch.apicv_active)
3822 kvm_x86_ops.sync_pir_to_irr(vcpu);
3823
3824 return kvm_apic_get_state(vcpu, s);
3825}
3826
3827static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
3828 struct kvm_lapic_state *s)
3829{
3830 int r;
3831
3832 r = kvm_apic_set_state(vcpu, s);
3833 if (r)
3834 return r;
3835 update_cr8_intercept(vcpu);
3836
3837 return 0;
3838}
3839
3840static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
3841{
3842 return (!lapic_in_kernel(vcpu) ||
3843 kvm_apic_accept_pic_intr(vcpu));
3844}
3845
3846
3847
3848
3849
3850
3851
3852static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
3853{
3854 return kvm_arch_interrupt_allowed(vcpu) &&
3855 !kvm_cpu_has_interrupt(vcpu) &&
3856 !kvm_event_needs_reinjection(vcpu) &&
3857 kvm_cpu_accept_dm_intr(vcpu);
3858}
3859
3860static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
3861 struct kvm_interrupt *irq)
3862{
3863 if (irq->irq >= KVM_NR_INTERRUPTS)
3864 return -EINVAL;
3865
3866 if (!irqchip_in_kernel(vcpu->kvm)) {
3867 kvm_queue_interrupt(vcpu, irq->irq, false);
3868 kvm_make_request(KVM_REQ_EVENT, vcpu);
3869 return 0;
3870 }
3871
3872
3873
3874
3875
3876 if (pic_in_kernel(vcpu->kvm))
3877 return -ENXIO;
3878
3879 if (vcpu->arch.pending_external_vector != -1)
3880 return -EEXIST;
3881
3882 vcpu->arch.pending_external_vector = irq->irq;
3883 kvm_make_request(KVM_REQ_EVENT, vcpu);
3884 return 0;
3885}
3886
3887static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
3888{
3889 kvm_inject_nmi(vcpu);
3890
3891 return 0;
3892}
3893
3894static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
3895{
3896 kvm_make_request(KVM_REQ_SMI, vcpu);
3897
3898 return 0;
3899}
3900
3901static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
3902 struct kvm_tpr_access_ctl *tac)
3903{
3904 if (tac->flags)
3905 return -EINVAL;
3906 vcpu->arch.tpr_access_reporting = !!tac->enabled;
3907 return 0;
3908}
3909
3910static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
3911 u64 mcg_cap)
3912{
3913 int r;
3914 unsigned bank_num = mcg_cap & 0xff, bank;
3915
3916 r = -EINVAL;
3917 if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
3918 goto out;
3919 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
3920 goto out;
3921 r = 0;
3922 vcpu->arch.mcg_cap = mcg_cap;
3923
3924 if (mcg_cap & MCG_CTL_P)
3925 vcpu->arch.mcg_ctl = ~(u64)0;
3926
3927 for (bank = 0; bank < bank_num; bank++)
3928 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
3929
3930 kvm_x86_ops.setup_mce(vcpu);
3931out:
3932 return r;
3933}
3934
3935static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
3936 struct kvm_x86_mce *mce)
3937{
3938 u64 mcg_cap = vcpu->arch.mcg_cap;
3939 unsigned bank_num = mcg_cap & 0xff;
3940 u64 *banks = vcpu->arch.mce_banks;
3941
3942 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
3943 return -EINVAL;
3944
3945
3946
3947
3948 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
3949 vcpu->arch.mcg_ctl != ~(u64)0)
3950 return 0;
3951 banks += 4 * mce->bank;
3952
3953
3954
3955
3956 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
3957 return 0;
3958 if (mce->status & MCI_STATUS_UC) {
3959 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
3960 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
3961 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
3962 return 0;
3963 }
3964 if (banks[1] & MCI_STATUS_VAL)
3965 mce->status |= MCI_STATUS_OVER;
3966 banks[2] = mce->addr;
3967 banks[3] = mce->misc;
3968 vcpu->arch.mcg_status = mce->mcg_status;
3969 banks[1] = mce->status;
3970 kvm_queue_exception(vcpu, MC_VECTOR);
3971 } else if (!(banks[1] & MCI_STATUS_VAL)
3972 || !(banks[1] & MCI_STATUS_UC)) {
3973 if (banks[1] & MCI_STATUS_VAL)
3974 mce->status |= MCI_STATUS_OVER;
3975 banks[2] = mce->addr;
3976 banks[3] = mce->misc;
3977 banks[1] = mce->status;
3978 } else
3979 banks[1] |= MCI_STATUS_OVER;
3980 return 0;
3981}
3982
3983static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
3984 struct kvm_vcpu_events *events)
3985{
3986 process_nmi(vcpu);
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999 if (!vcpu->kvm->arch.exception_payload_enabled &&
4000 vcpu->arch.exception.pending && vcpu->arch.exception.has_payload)
4001 kvm_deliver_exception_payload(vcpu);
4002
4003
4004
4005
4006
4007
4008
4009 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
4010 events->exception.injected = 0;
4011 events->exception.pending = 0;
4012 } else {
4013 events->exception.injected = vcpu->arch.exception.injected;
4014 events->exception.pending = vcpu->arch.exception.pending;
4015
4016
4017
4018
4019
4020 if (!vcpu->kvm->arch.exception_payload_enabled)
4021 events->exception.injected |=
4022 vcpu->arch.exception.pending;
4023 }
4024 events->exception.nr = vcpu->arch.exception.nr;
4025 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
4026 events->exception.error_code = vcpu->arch.exception.error_code;
4027 events->exception_has_payload = vcpu->arch.exception.has_payload;
4028 events->exception_payload = vcpu->arch.exception.payload;
4029
4030 events->interrupt.injected =
4031 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
4032 events->interrupt.nr = vcpu->arch.interrupt.nr;
4033 events->interrupt.soft = 0;
4034 events->interrupt.shadow = kvm_x86_ops.get_interrupt_shadow(vcpu);
4035
4036 events->nmi.injected = vcpu->arch.nmi_injected;
4037 events->nmi.pending = vcpu->arch.nmi_pending != 0;
4038 events->nmi.masked = kvm_x86_ops.get_nmi_mask(vcpu);
4039 events->nmi.pad = 0;
4040
4041 events->sipi_vector = 0;
4042
4043 events->smi.smm = is_smm(vcpu);
4044 events->smi.pending = vcpu->arch.smi_pending;
4045 events->smi.smm_inside_nmi =
4046 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
4047 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
4048
4049 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
4050 | KVM_VCPUEVENT_VALID_SHADOW
4051 | KVM_VCPUEVENT_VALID_SMM);
4052 if (vcpu->kvm->arch.exception_payload_enabled)
4053 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
4054
4055 memset(&events->reserved, 0, sizeof(events->reserved));
4056}
4057
4058static void kvm_smm_changed(struct kvm_vcpu *vcpu);
4059
4060static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
4061 struct kvm_vcpu_events *events)
4062{
4063 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
4064 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
4065 | KVM_VCPUEVENT_VALID_SHADOW
4066 | KVM_VCPUEVENT_VALID_SMM
4067 | KVM_VCPUEVENT_VALID_PAYLOAD))
4068 return -EINVAL;
4069
4070 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
4071 if (!vcpu->kvm->arch.exception_payload_enabled)
4072 return -EINVAL;
4073 if (events->exception.pending)
4074 events->exception.injected = 0;
4075 else
4076 events->exception_has_payload = 0;
4077 } else {
4078 events->exception.pending = 0;
4079 events->exception_has_payload = 0;
4080 }
4081
4082 if ((events->exception.injected || events->exception.pending) &&
4083 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
4084 return -EINVAL;
4085
4086
4087 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
4088 (events->smi.smm || events->smi.pending) &&
4089 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
4090 return -EINVAL;
4091
4092 process_nmi(vcpu);
4093 vcpu->arch.exception.injected = events->exception.injected;
4094 vcpu->arch.exception.pending = events->exception.pending;
4095 vcpu->arch.exception.nr = events->exception.nr;
4096 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
4097 vcpu->arch.exception.error_code = events->exception.error_code;
4098 vcpu->arch.exception.has_payload = events->exception_has_payload;
4099 vcpu->arch.exception.payload = events->exception_payload;
4100
4101 vcpu->arch.interrupt.injected = events->interrupt.injected;
4102 vcpu->arch.interrupt.nr = events->interrupt.nr;
4103 vcpu->arch.interrupt.soft = events->interrupt.soft;
4104 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
4105 kvm_x86_ops.set_interrupt_shadow(vcpu,
4106 events->interrupt.shadow);
4107
4108 vcpu->arch.nmi_injected = events->nmi.injected;
4109 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
4110 vcpu->arch.nmi_pending = events->nmi.pending;
4111 kvm_x86_ops.set_nmi_mask(vcpu, events->nmi.masked);
4112
4113 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
4114 lapic_in_kernel(vcpu))
4115 vcpu->arch.apic->sipi_vector = events->sipi_vector;
4116
4117 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
4118 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
4119 if (events->smi.smm)
4120 vcpu->arch.hflags |= HF_SMM_MASK;
4121 else
4122 vcpu->arch.hflags &= ~HF_SMM_MASK;
4123 kvm_smm_changed(vcpu);
4124 }
4125
4126 vcpu->arch.smi_pending = events->smi.pending;
4127
4128 if (events->smi.smm) {
4129 if (events->smi.smm_inside_nmi)
4130 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
4131 else
4132 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
4133 }
4134
4135 if (lapic_in_kernel(vcpu)) {
4136 if (events->smi.latched_init)
4137 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4138 else
4139 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
4140 }
4141 }
4142
4143 kvm_make_request(KVM_REQ_EVENT, vcpu);
4144
4145 return 0;
4146}
4147
4148static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
4149 struct kvm_debugregs *dbgregs)
4150{
4151 unsigned long val;
4152
4153 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
4154 kvm_get_dr(vcpu, 6, &val);
4155 dbgregs->dr6 = val;
4156 dbgregs->dr7 = vcpu->arch.dr7;
4157 dbgregs->flags = 0;
4158 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
4159}
4160
4161static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
4162 struct kvm_debugregs *dbgregs)
4163{
4164 if (dbgregs->flags)
4165 return -EINVAL;
4166
4167 if (dbgregs->dr6 & ~0xffffffffull)
4168 return -EINVAL;
4169 if (dbgregs->dr7 & ~0xffffffffull)
4170 return -EINVAL;
4171
4172 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
4173 kvm_update_dr0123(vcpu);
4174 vcpu->arch.dr6 = dbgregs->dr6;
4175 vcpu->arch.dr7 = dbgregs->dr7;
4176 kvm_update_dr7(vcpu);
4177
4178 return 0;
4179}
4180
4181#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
4182
4183static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
4184{
4185 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
4186 u64 xstate_bv = xsave->header.xfeatures;
4187 u64 valid;
4188
4189
4190
4191
4192
4193 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
4194
4195
4196 xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
4197 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
4198
4199
4200
4201
4202
4203 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
4204 while (valid) {
4205 u64 xfeature_mask = valid & -valid;
4206 int xfeature_nr = fls64(xfeature_mask) - 1;
4207 void *src = get_xsave_addr(xsave, xfeature_nr);
4208
4209 if (src) {
4210 u32 size, offset, ecx, edx;
4211 cpuid_count(XSTATE_CPUID, xfeature_nr,
4212 &size, &offset, &ecx, &edx);
4213 if (xfeature_nr == XFEATURE_PKRU)
4214 memcpy(dest + offset, &vcpu->arch.pkru,
4215 sizeof(vcpu->arch.pkru));
4216 else
4217 memcpy(dest + offset, src, size);
4218
4219 }
4220
4221 valid -= xfeature_mask;
4222 }
4223}
4224
4225static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
4226{
4227 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
4228 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
4229 u64 valid;
4230
4231
4232
4233
4234
4235 memcpy(xsave, src, XSAVE_HDR_OFFSET);
4236
4237
4238 xsave->header.xfeatures = xstate_bv;
4239 if (boot_cpu_has(X86_FEATURE_XSAVES))
4240 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
4241
4242
4243
4244
4245
4246 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
4247 while (valid) {
4248 u64 xfeature_mask = valid & -valid;
4249 int xfeature_nr = fls64(xfeature_mask) - 1;
4250 void *dest = get_xsave_addr(xsave, xfeature_nr);
4251
4252 if (dest) {
4253 u32 size, offset, ecx, edx;
4254 cpuid_count(XSTATE_CPUID, xfeature_nr,
4255 &size, &offset, &ecx, &edx);
4256 if (xfeature_nr == XFEATURE_PKRU)
4257 memcpy(&vcpu->arch.pkru, src + offset,
4258 sizeof(vcpu->arch.pkru));
4259 else
4260 memcpy(dest, src + offset, size);
4261 }
4262
4263 valid -= xfeature_mask;
4264 }
4265}
4266
4267static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
4268 struct kvm_xsave *guest_xsave)
4269{
4270 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4271 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
4272 fill_xsave((u8 *) guest_xsave->region, vcpu);
4273 } else {
4274 memcpy(guest_xsave->region,
4275 &vcpu->arch.guest_fpu->state.fxsave,
4276 sizeof(struct fxregs_state));
4277 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
4278 XFEATURE_MASK_FPSSE;
4279 }
4280}
4281
4282#define XSAVE_MXCSR_OFFSET 24
4283
4284static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
4285 struct kvm_xsave *guest_xsave)
4286{
4287 u64 xstate_bv =
4288 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
4289 u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
4290
4291 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4292
4293
4294
4295
4296
4297 if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask)
4298 return -EINVAL;
4299 load_xsave(vcpu, (u8 *)guest_xsave->region);
4300 } else {
4301 if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
4302 mxcsr & ~mxcsr_feature_mask)
4303 return -EINVAL;
4304 memcpy(&vcpu->arch.guest_fpu->state.fxsave,
4305 guest_xsave->region, sizeof(struct fxregs_state));
4306 }
4307 return 0;
4308}
4309
4310static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
4311 struct kvm_xcrs *guest_xcrs)
4312{
4313 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
4314 guest_xcrs->nr_xcrs = 0;
4315 return;
4316 }
4317
4318 guest_xcrs->nr_xcrs = 1;
4319 guest_xcrs->flags = 0;
4320 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
4321 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
4322}
4323
4324static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
4325 struct kvm_xcrs *guest_xcrs)
4326{
4327 int i, r = 0;
4328
4329 if (!boot_cpu_has(X86_FEATURE_XSAVE))
4330 return -EINVAL;
4331
4332 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
4333 return -EINVAL;
4334
4335 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
4336
4337 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
4338 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
4339 guest_xcrs->xcrs[i].value);
4340 break;
4341 }
4342 if (r)
4343 r = -EINVAL;
4344 return r;
4345}
4346
4347
4348
4349
4350
4351
4352
4353static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
4354{
4355 if (!vcpu->arch.pv_time_enabled)
4356 return -EINVAL;
4357 vcpu->arch.pvclock_set_guest_stopped_request = true;
4358 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4359 return 0;
4360}
4361
4362static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4363 struct kvm_enable_cap *cap)
4364{
4365 int r;
4366 uint16_t vmcs_version;
4367 void __user *user_ptr;
4368
4369 if (cap->flags)
4370 return -EINVAL;
4371
4372 switch (cap->cap) {
4373 case KVM_CAP_HYPERV_SYNIC2:
4374 if (cap->args[0])
4375 return -EINVAL;
4376 fallthrough;
4377
4378 case KVM_CAP_HYPERV_SYNIC:
4379 if (!irqchip_in_kernel(vcpu->kvm))
4380 return -EINVAL;
4381 return kvm_hv_activate_synic(vcpu, cap->cap ==
4382 KVM_CAP_HYPERV_SYNIC2);
4383 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4384 if (!kvm_x86_ops.nested_ops->enable_evmcs)
4385 return -ENOTTY;
4386 r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version);
4387 if (!r) {
4388 user_ptr = (void __user *)(uintptr_t)cap->args[0];
4389 if (copy_to_user(user_ptr, &vmcs_version,
4390 sizeof(vmcs_version)))
4391 r = -EFAULT;
4392 }
4393 return r;
4394 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4395 if (!kvm_x86_ops.enable_direct_tlbflush)
4396 return -ENOTTY;
4397
4398 return kvm_x86_ops.enable_direct_tlbflush(vcpu);
4399
4400 default:
4401 return -EINVAL;
4402 }
4403}
4404
4405long kvm_arch_vcpu_ioctl(struct file *filp,
4406 unsigned int ioctl, unsigned long arg)
4407{
4408 struct kvm_vcpu *vcpu = filp->private_data;
4409 void __user *argp = (void __user *)arg;
4410 int r;
4411 union {
4412 struct kvm_lapic_state *lapic;
4413 struct kvm_xsave *xsave;
4414 struct kvm_xcrs *xcrs;
4415 void *buffer;
4416 } u;
4417
4418 vcpu_load(vcpu);
4419
4420 u.buffer = NULL;
4421 switch (ioctl) {
4422 case KVM_GET_LAPIC: {
4423 r = -EINVAL;
4424 if (!lapic_in_kernel(vcpu))
4425 goto out;
4426 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
4427 GFP_KERNEL_ACCOUNT);
4428
4429 r = -ENOMEM;
4430 if (!u.lapic)
4431 goto out;
4432 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
4433 if (r)
4434 goto out;
4435 r = -EFAULT;
4436 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
4437 goto out;
4438 r = 0;
4439 break;
4440 }
4441 case KVM_SET_LAPIC: {
4442 r = -EINVAL;
4443 if (!lapic_in_kernel(vcpu))
4444 goto out;
4445 u.lapic = memdup_user(argp, sizeof(*u.lapic));
4446 if (IS_ERR(u.lapic)) {
4447 r = PTR_ERR(u.lapic);
4448 goto out_nofree;
4449 }
4450
4451 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
4452 break;
4453 }
4454 case KVM_INTERRUPT: {
4455 struct kvm_interrupt irq;
4456
4457 r = -EFAULT;
4458 if (copy_from_user(&irq, argp, sizeof(irq)))
4459 goto out;
4460 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
4461 break;
4462 }
4463 case KVM_NMI: {
4464 r = kvm_vcpu_ioctl_nmi(vcpu);
4465 break;
4466 }
4467 case KVM_SMI: {
4468 r = kvm_vcpu_ioctl_smi(vcpu);
4469 break;
4470 }
4471 case KVM_SET_CPUID: {
4472 struct kvm_cpuid __user *cpuid_arg = argp;
4473 struct kvm_cpuid cpuid;
4474
4475 r = -EFAULT;
4476 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4477 goto out;
4478 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
4479 break;
4480 }
4481 case KVM_SET_CPUID2: {
4482 struct kvm_cpuid2 __user *cpuid_arg = argp;
4483 struct kvm_cpuid2 cpuid;
4484
4485 r = -EFAULT;
4486 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4487 goto out;
4488 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
4489 cpuid_arg->entries);
4490 break;
4491 }
4492 case KVM_GET_CPUID2: {
4493 struct kvm_cpuid2 __user *cpuid_arg = argp;
4494 struct kvm_cpuid2 cpuid;
4495
4496 r = -EFAULT;
4497 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4498 goto out;
4499 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
4500 cpuid_arg->entries);
4501 if (r)
4502 goto out;
4503 r = -EFAULT;
4504 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4505 goto out;
4506 r = 0;
4507 break;
4508 }
4509 case KVM_GET_MSRS: {
4510 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4511 r = msr_io(vcpu, argp, do_get_msr, 1);
4512 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4513 break;
4514 }
4515 case KVM_SET_MSRS: {
4516 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4517 r = msr_io(vcpu, argp, do_set_msr, 0);
4518 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4519 break;
4520 }
4521 case KVM_TPR_ACCESS_REPORTING: {
4522 struct kvm_tpr_access_ctl tac;
4523
4524 r = -EFAULT;
4525 if (copy_from_user(&tac, argp, sizeof(tac)))
4526 goto out;
4527 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
4528 if (r)
4529 goto out;
4530 r = -EFAULT;
4531 if (copy_to_user(argp, &tac, sizeof(tac)))
4532 goto out;
4533 r = 0;
4534 break;
4535 };
4536 case KVM_SET_VAPIC_ADDR: {
4537 struct kvm_vapic_addr va;
4538 int idx;
4539
4540 r = -EINVAL;
4541 if (!lapic_in_kernel(vcpu))
4542 goto out;
4543 r = -EFAULT;
4544 if (copy_from_user(&va, argp, sizeof(va)))
4545 goto out;
4546 idx = srcu_read_lock(&vcpu->kvm->srcu);
4547 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
4548 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4549 break;
4550 }
4551 case KVM_X86_SETUP_MCE: {
4552 u64 mcg_cap;
4553
4554 r = -EFAULT;
4555 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
4556 goto out;
4557 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
4558 break;
4559 }
4560 case KVM_X86_SET_MCE: {
4561 struct kvm_x86_mce mce;
4562
4563 r = -EFAULT;
4564 if (copy_from_user(&mce, argp, sizeof(mce)))
4565 goto out;
4566 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
4567 break;
4568 }
4569 case KVM_GET_VCPU_EVENTS: {
4570 struct kvm_vcpu_events events;
4571
4572 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
4573
4574 r = -EFAULT;
4575 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
4576 break;
4577 r = 0;
4578 break;
4579 }
4580 case KVM_SET_VCPU_EVENTS: {
4581 struct kvm_vcpu_events events;
4582
4583 r = -EFAULT;
4584 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
4585 break;
4586
4587 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
4588 break;
4589 }
4590 case KVM_GET_DEBUGREGS: {
4591 struct kvm_debugregs dbgregs;
4592
4593 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
4594
4595 r = -EFAULT;
4596 if (copy_to_user(argp, &dbgregs,
4597 sizeof(struct kvm_debugregs)))
4598 break;
4599 r = 0;
4600 break;
4601 }
4602 case KVM_SET_DEBUGREGS: {
4603 struct kvm_debugregs dbgregs;
4604
4605 r = -EFAULT;
4606 if (copy_from_user(&dbgregs, argp,
4607 sizeof(struct kvm_debugregs)))
4608 break;
4609
4610 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
4611 break;
4612 }
4613 case KVM_GET_XSAVE: {
4614 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
4615 r = -ENOMEM;
4616 if (!u.xsave)
4617 break;
4618
4619 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
4620
4621 r = -EFAULT;
4622 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
4623 break;
4624 r = 0;
4625 break;
4626 }
4627 case KVM_SET_XSAVE: {
4628 u.xsave = memdup_user(argp, sizeof(*u.xsave));
4629 if (IS_ERR(u.xsave)) {
4630 r = PTR_ERR(u.xsave);
4631 goto out_nofree;
4632 }
4633
4634 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
4635 break;
4636 }
4637 case KVM_GET_XCRS: {
4638 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
4639 r = -ENOMEM;
4640 if (!u.xcrs)
4641 break;
4642
4643 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
4644
4645 r = -EFAULT;
4646 if (copy_to_user(argp, u.xcrs,
4647 sizeof(struct kvm_xcrs)))
4648 break;
4649 r = 0;
4650 break;
4651 }
4652 case KVM_SET_XCRS: {
4653 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
4654 if (IS_ERR(u.xcrs)) {
4655 r = PTR_ERR(u.xcrs);
4656 goto out_nofree;
4657 }
4658
4659 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
4660 break;
4661 }
4662 case KVM_SET_TSC_KHZ: {
4663 u32 user_tsc_khz;
4664
4665 r = -EINVAL;
4666 user_tsc_khz = (u32)arg;
4667
4668 if (kvm_has_tsc_control &&
4669 user_tsc_khz >= kvm_max_guest_tsc_khz)
4670 goto out;
4671
4672 if (user_tsc_khz == 0)
4673 user_tsc_khz = tsc_khz;
4674
4675 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
4676 r = 0;
4677
4678 goto out;
4679 }
4680 case KVM_GET_TSC_KHZ: {
4681 r = vcpu->arch.virtual_tsc_khz;
4682 goto out;
4683 }
4684 case KVM_KVMCLOCK_CTRL: {
4685 r = kvm_set_guest_paused(vcpu);
4686 goto out;
4687 }
4688 case KVM_ENABLE_CAP: {
4689 struct kvm_enable_cap cap;
4690
4691 r = -EFAULT;
4692 if (copy_from_user(&cap, argp, sizeof(cap)))
4693 goto out;
4694 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4695 break;
4696 }
4697 case KVM_GET_NESTED_STATE: {
4698 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4699 u32 user_data_size;
4700
4701 r = -EINVAL;
4702 if (!kvm_x86_ops.nested_ops->get_state)
4703 break;
4704
4705 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
4706 r = -EFAULT;
4707 if (get_user(user_data_size, &user_kvm_nested_state->size))
4708 break;
4709
4710 r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state,
4711 user_data_size);
4712 if (r < 0)
4713 break;
4714
4715 if (r > user_data_size) {
4716 if (put_user(r, &user_kvm_nested_state->size))
4717 r = -EFAULT;
4718 else
4719 r = -E2BIG;
4720 break;
4721 }
4722
4723 r = 0;
4724 break;
4725 }
4726 case KVM_SET_NESTED_STATE: {
4727 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4728 struct kvm_nested_state kvm_state;
4729 int idx;
4730
4731 r = -EINVAL;
4732 if (!kvm_x86_ops.nested_ops->set_state)
4733 break;
4734
4735 r = -EFAULT;
4736 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
4737 break;
4738
4739 r = -EINVAL;
4740 if (kvm_state.size < sizeof(kvm_state))
4741 break;
4742
4743 if (kvm_state.flags &
4744 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
4745 | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING
4746 | KVM_STATE_NESTED_GIF_SET))
4747 break;
4748
4749
4750 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
4751 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
4752 break;
4753
4754 idx = srcu_read_lock(&vcpu->kvm->srcu);
4755 r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state);
4756 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4757 break;
4758 }
4759 case KVM_GET_SUPPORTED_HV_CPUID: {
4760 struct kvm_cpuid2 __user *cpuid_arg = argp;
4761 struct kvm_cpuid2 cpuid;
4762
4763 r = -EFAULT;
4764 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4765 goto out;
4766
4767 r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
4768 cpuid_arg->entries);
4769 if (r)
4770 goto out;
4771
4772 r = -EFAULT;
4773 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4774 goto out;
4775 r = 0;
4776 break;
4777 }
4778 default:
4779 r = -EINVAL;
4780 }
4781out:
4782 kfree(u.buffer);
4783out_nofree:
4784 vcpu_put(vcpu);
4785 return r;
4786}
4787
4788vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4789{
4790 return VM_FAULT_SIGBUS;
4791}
4792
4793static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
4794{
4795 int ret;
4796
4797 if (addr > (unsigned int)(-3 * PAGE_SIZE))
4798 return -EINVAL;
4799 ret = kvm_x86_ops.set_tss_addr(kvm, addr);
4800 return ret;
4801}
4802
4803static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
4804 u64 ident_addr)
4805{
4806 return kvm_x86_ops.set_identity_map_addr(kvm, ident_addr);
4807}
4808
4809static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
4810 unsigned long kvm_nr_mmu_pages)
4811{
4812 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
4813 return -EINVAL;
4814
4815 mutex_lock(&kvm->slots_lock);
4816
4817 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
4818 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
4819
4820 mutex_unlock(&kvm->slots_lock);
4821 return 0;
4822}
4823
4824static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
4825{
4826 return kvm->arch.n_max_mmu_pages;
4827}
4828
4829static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4830{
4831 struct kvm_pic *pic = kvm->arch.vpic;
4832 int r;
4833
4834 r = 0;
4835 switch (chip->chip_id) {
4836 case KVM_IRQCHIP_PIC_MASTER:
4837 memcpy(&chip->chip.pic, &pic->pics[0],
4838 sizeof(struct kvm_pic_state));
4839 break;
4840 case KVM_IRQCHIP_PIC_SLAVE:
4841 memcpy(&chip->chip.pic, &pic->pics[1],
4842 sizeof(struct kvm_pic_state));
4843 break;
4844 case KVM_IRQCHIP_IOAPIC:
4845 kvm_get_ioapic(kvm, &chip->chip.ioapic);
4846 break;
4847 default:
4848 r = -EINVAL;
4849 break;
4850 }
4851 return r;
4852}
4853
4854static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4855{
4856 struct kvm_pic *pic = kvm->arch.vpic;
4857 int r;
4858
4859 r = 0;
4860 switch (chip->chip_id) {
4861 case KVM_IRQCHIP_PIC_MASTER:
4862 spin_lock(&pic->lock);
4863 memcpy(&pic->pics[0], &chip->chip.pic,
4864 sizeof(struct kvm_pic_state));
4865 spin_unlock(&pic->lock);
4866 break;
4867 case KVM_IRQCHIP_PIC_SLAVE:
4868 spin_lock(&pic->lock);
4869 memcpy(&pic->pics[1], &chip->chip.pic,
4870 sizeof(struct kvm_pic_state));
4871 spin_unlock(&pic->lock);
4872 break;
4873 case KVM_IRQCHIP_IOAPIC:
4874 kvm_set_ioapic(kvm, &chip->chip.ioapic);
4875 break;
4876 default:
4877 r = -EINVAL;
4878 break;
4879 }
4880 kvm_pic_update_irq(pic);
4881 return r;
4882}
4883
4884static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4885{
4886 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
4887
4888 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
4889
4890 mutex_lock(&kps->lock);
4891 memcpy(ps, &kps->channels, sizeof(*ps));
4892 mutex_unlock(&kps->lock);
4893 return 0;
4894}
4895
4896static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4897{
4898 int i;
4899 struct kvm_pit *pit = kvm->arch.vpit;
4900
4901 mutex_lock(&pit->pit_state.lock);
4902 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
4903 for (i = 0; i < 3; i++)
4904 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
4905 mutex_unlock(&pit->pit_state.lock);
4906 return 0;
4907}
4908
4909static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4910{
4911 mutex_lock(&kvm->arch.vpit->pit_state.lock);
4912 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
4913 sizeof(ps->channels));
4914 ps->flags = kvm->arch.vpit->pit_state.flags;
4915 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
4916 memset(&ps->reserved, 0, sizeof(ps->reserved));
4917 return 0;
4918}
4919
4920static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4921{
4922 int start = 0;
4923 int i;
4924 u32 prev_legacy, cur_legacy;
4925 struct kvm_pit *pit = kvm->arch.vpit;
4926
4927 mutex_lock(&pit->pit_state.lock);
4928 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
4929 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
4930 if (!prev_legacy && cur_legacy)
4931 start = 1;
4932 memcpy(&pit->pit_state.channels, &ps->channels,
4933 sizeof(pit->pit_state.channels));
4934 pit->pit_state.flags = ps->flags;
4935 for (i = 0; i < 3; i++)
4936 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
4937 start && i == 0);
4938 mutex_unlock(&pit->pit_state.lock);
4939 return 0;
4940}
4941
4942static int kvm_vm_ioctl_reinject(struct kvm *kvm,
4943 struct kvm_reinject_control *control)
4944{
4945 struct kvm_pit *pit = kvm->arch.vpit;
4946
4947
4948
4949
4950
4951 mutex_lock(&pit->pit_state.lock);
4952 kvm_pit_set_reinject(pit, control->pit_reinject);
4953 mutex_unlock(&pit->pit_state.lock);
4954
4955 return 0;
4956}
4957
4958void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
4959{
4960
4961
4962
4963 if (kvm_x86_ops.flush_log_dirty)
4964 kvm_x86_ops.flush_log_dirty(kvm);
4965}
4966
4967int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
4968 bool line_status)
4969{
4970 if (!irqchip_in_kernel(kvm))
4971 return -ENXIO;
4972
4973 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
4974 irq_event->irq, irq_event->level,
4975 line_status);
4976 return 0;
4977}
4978
4979int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
4980 struct kvm_enable_cap *cap)
4981{
4982 int r;
4983
4984 if (cap->flags)
4985 return -EINVAL;
4986
4987 switch (cap->cap) {
4988 case KVM_CAP_DISABLE_QUIRKS:
4989 kvm->arch.disabled_quirks = cap->args[0];
4990 r = 0;
4991 break;
4992 case KVM_CAP_SPLIT_IRQCHIP: {
4993 mutex_lock(&kvm->lock);
4994 r = -EINVAL;
4995 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
4996 goto split_irqchip_unlock;
4997 r = -EEXIST;
4998 if (irqchip_in_kernel(kvm))
4999 goto split_irqchip_unlock;
5000 if (kvm->created_vcpus)
5001 goto split_irqchip_unlock;
5002 r = kvm_setup_empty_irq_routing(kvm);
5003 if (r)
5004 goto split_irqchip_unlock;
5005
5006 smp_wmb();
5007 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
5008 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
5009 r = 0;
5010split_irqchip_unlock:
5011 mutex_unlock(&kvm->lock);
5012 break;
5013 }
5014 case KVM_CAP_X2APIC_API:
5015 r = -EINVAL;
5016 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
5017 break;
5018
5019 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
5020 kvm->arch.x2apic_format = true;
5021 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
5022 kvm->arch.x2apic_broadcast_quirk_disabled = true;
5023
5024 r = 0;
5025 break;
5026 case KVM_CAP_X86_DISABLE_EXITS:
5027 r = -EINVAL;
5028 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
5029 break;
5030
5031 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
5032 kvm_can_mwait_in_guest())
5033 kvm->arch.mwait_in_guest = true;
5034 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
5035 kvm->arch.hlt_in_guest = true;
5036 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
5037 kvm->arch.pause_in_guest = true;
5038 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
5039 kvm->arch.cstate_in_guest = true;
5040 r = 0;
5041 break;
5042 case KVM_CAP_MSR_PLATFORM_INFO:
5043 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
5044 r = 0;
5045 break;
5046 case KVM_CAP_EXCEPTION_PAYLOAD:
5047 kvm->arch.exception_payload_enabled = cap->args[0];
5048 r = 0;
5049 break;
5050 default:
5051 r = -EINVAL;
5052 break;
5053 }
5054 return r;
5055}
5056
5057long kvm_arch_vm_ioctl(struct file *filp,
5058 unsigned int ioctl, unsigned long arg)
5059{
5060 struct kvm *kvm = filp->private_data;
5061 void __user *argp = (void __user *)arg;
5062 int r = -ENOTTY;
5063
5064
5065
5066
5067
5068 union {
5069 struct kvm_pit_state ps;
5070 struct kvm_pit_state2 ps2;
5071 struct kvm_pit_config pit_config;
5072 } u;
5073
5074 switch (ioctl) {
5075 case KVM_SET_TSS_ADDR:
5076 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
5077 break;
5078 case KVM_SET_IDENTITY_MAP_ADDR: {
5079 u64 ident_addr;
5080
5081 mutex_lock(&kvm->lock);
5082 r = -EINVAL;
5083 if (kvm->created_vcpus)
5084 goto set_identity_unlock;
5085 r = -EFAULT;
5086 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
5087 goto set_identity_unlock;
5088 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
5089set_identity_unlock:
5090 mutex_unlock(&kvm->lock);
5091 break;
5092 }
5093 case KVM_SET_NR_MMU_PAGES:
5094 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
5095 break;
5096 case KVM_GET_NR_MMU_PAGES:
5097 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
5098 break;
5099 case KVM_CREATE_IRQCHIP: {
5100 mutex_lock(&kvm->lock);
5101
5102 r = -EEXIST;
5103 if (irqchip_in_kernel(kvm))
5104 goto create_irqchip_unlock;
5105
5106 r = -EINVAL;
5107 if (kvm->created_vcpus)
5108 goto create_irqchip_unlock;
5109
5110 r = kvm_pic_init(kvm);
5111 if (r)
5112 goto create_irqchip_unlock;
5113
5114 r = kvm_ioapic_init(kvm);
5115 if (r) {
5116 kvm_pic_destroy(kvm);
5117 goto create_irqchip_unlock;
5118 }
5119
5120 r = kvm_setup_default_irq_routing(kvm);
5121 if (r) {
5122 kvm_ioapic_destroy(kvm);
5123 kvm_pic_destroy(kvm);
5124 goto create_irqchip_unlock;
5125 }
5126
5127 smp_wmb();
5128 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
5129 create_irqchip_unlock:
5130 mutex_unlock(&kvm->lock);
5131 break;
5132 }
5133 case KVM_CREATE_PIT:
5134 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
5135 goto create_pit;
5136 case KVM_CREATE_PIT2:
5137 r = -EFAULT;
5138 if (copy_from_user(&u.pit_config, argp,
5139 sizeof(struct kvm_pit_config)))
5140 goto out;
5141 create_pit:
5142 mutex_lock(&kvm->lock);
5143 r = -EEXIST;
5144 if (kvm->arch.vpit)
5145 goto create_pit_unlock;
5146 r = -ENOMEM;
5147 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
5148 if (kvm->arch.vpit)
5149 r = 0;
5150 create_pit_unlock:
5151 mutex_unlock(&kvm->lock);
5152 break;
5153 case KVM_GET_IRQCHIP: {
5154
5155 struct kvm_irqchip *chip;
5156
5157 chip = memdup_user(argp, sizeof(*chip));
5158 if (IS_ERR(chip)) {
5159 r = PTR_ERR(chip);
5160 goto out;
5161 }
5162
5163 r = -ENXIO;
5164 if (!irqchip_kernel(kvm))
5165 goto get_irqchip_out;
5166 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
5167 if (r)
5168 goto get_irqchip_out;
5169 r = -EFAULT;
5170 if (copy_to_user(argp, chip, sizeof(*chip)))
5171 goto get_irqchip_out;
5172 r = 0;
5173 get_irqchip_out:
5174 kfree(chip);
5175 break;
5176 }
5177 case KVM_SET_IRQCHIP: {
5178
5179 struct kvm_irqchip *chip;
5180
5181 chip = memdup_user(argp, sizeof(*chip));
5182 if (IS_ERR(chip)) {
5183 r = PTR_ERR(chip);
5184 goto out;
5185 }
5186
5187 r = -ENXIO;
5188 if (!irqchip_kernel(kvm))
5189 goto set_irqchip_out;
5190 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
5191 set_irqchip_out:
5192 kfree(chip);
5193 break;
5194 }
5195 case KVM_GET_PIT: {
5196 r = -EFAULT;
5197 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
5198 goto out;
5199 r = -ENXIO;
5200 if (!kvm->arch.vpit)
5201 goto out;
5202 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
5203 if (r)
5204 goto out;
5205 r = -EFAULT;
5206 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
5207 goto out;
5208 r = 0;
5209 break;
5210 }
5211 case KVM_SET_PIT: {
5212 r = -EFAULT;
5213 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
5214 goto out;
5215 mutex_lock(&kvm->lock);
5216 r = -ENXIO;
5217 if (!kvm->arch.vpit)
5218 goto set_pit_out;
5219 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
5220set_pit_out:
5221 mutex_unlock(&kvm->lock);
5222 break;
5223 }
5224 case KVM_GET_PIT2: {
5225 r = -ENXIO;
5226 if (!kvm->arch.vpit)
5227 goto out;
5228 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
5229 if (r)
5230 goto out;
5231 r = -EFAULT;
5232 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
5233 goto out;
5234 r = 0;
5235 break;
5236 }
5237 case KVM_SET_PIT2: {
5238 r = -EFAULT;
5239 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
5240 goto out;
5241 mutex_lock(&kvm->lock);
5242 r = -ENXIO;
5243 if (!kvm->arch.vpit)
5244 goto set_pit2_out;
5245 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
5246set_pit2_out:
5247 mutex_unlock(&kvm->lock);
5248 break;
5249 }
5250 case KVM_REINJECT_CONTROL: {
5251 struct kvm_reinject_control control;
5252 r = -EFAULT;
5253 if (copy_from_user(&control, argp, sizeof(control)))
5254 goto out;
5255 r = -ENXIO;
5256 if (!kvm->arch.vpit)
5257 goto out;
5258 r = kvm_vm_ioctl_reinject(kvm, &control);
5259 break;
5260 }
5261 case KVM_SET_BOOT_CPU_ID:
5262 r = 0;
5263 mutex_lock(&kvm->lock);
5264 if (kvm->created_vcpus)
5265 r = -EBUSY;
5266 else
5267 kvm->arch.bsp_vcpu_id = arg;
5268 mutex_unlock(&kvm->lock);
5269 break;
5270 case KVM_XEN_HVM_CONFIG: {
5271 struct kvm_xen_hvm_config xhc;
5272 r = -EFAULT;
5273 if (copy_from_user(&xhc, argp, sizeof(xhc)))
5274 goto out;
5275 r = -EINVAL;
5276 if (xhc.flags)
5277 goto out;
5278 memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
5279 r = 0;
5280 break;
5281 }
5282 case KVM_SET_CLOCK: {
5283 struct kvm_clock_data user_ns;
5284 u64 now_ns;
5285
5286 r = -EFAULT;
5287 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
5288 goto out;
5289
5290 r = -EINVAL;
5291 if (user_ns.flags)
5292 goto out;
5293
5294 r = 0;
5295
5296
5297
5298
5299
5300 kvm_gen_update_masterclock(kvm);
5301 now_ns = get_kvmclock_ns(kvm);
5302 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
5303 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
5304 break;
5305 }
5306 case KVM_GET_CLOCK: {
5307 struct kvm_clock_data user_ns;
5308 u64 now_ns;
5309
5310 now_ns = get_kvmclock_ns(kvm);
5311 user_ns.clock = now_ns;
5312 user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
5313 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
5314
5315 r = -EFAULT;
5316 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
5317 goto out;
5318 r = 0;
5319 break;
5320 }
5321 case KVM_MEMORY_ENCRYPT_OP: {
5322 r = -ENOTTY;
5323 if (kvm_x86_ops.mem_enc_op)
5324 r = kvm_x86_ops.mem_enc_op(kvm, argp);
5325 break;
5326 }
5327 case KVM_MEMORY_ENCRYPT_REG_REGION: {
5328 struct kvm_enc_region region;
5329
5330 r = -EFAULT;
5331 if (copy_from_user(®ion, argp, sizeof(region)))
5332 goto out;
5333
5334 r = -ENOTTY;
5335 if (kvm_x86_ops.mem_enc_reg_region)
5336 r = kvm_x86_ops.mem_enc_reg_region(kvm, ®ion);
5337 break;
5338 }
5339 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
5340 struct kvm_enc_region region;
5341
5342 r = -EFAULT;
5343 if (copy_from_user(®ion, argp, sizeof(region)))
5344 goto out;
5345
5346 r = -ENOTTY;
5347 if (kvm_x86_ops.mem_enc_unreg_region)
5348 r = kvm_x86_ops.mem_enc_unreg_region(kvm, ®ion);
5349 break;
5350 }
5351 case KVM_HYPERV_EVENTFD: {
5352 struct kvm_hyperv_eventfd hvevfd;
5353
5354 r = -EFAULT;
5355 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
5356 goto out;
5357 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
5358 break;
5359 }
5360 case KVM_SET_PMU_EVENT_FILTER:
5361 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
5362 break;
5363 default:
5364 r = -ENOTTY;
5365 }
5366out:
5367 return r;
5368}
5369
5370static void kvm_init_msr_list(void)
5371{
5372 struct x86_pmu_capability x86_pmu;
5373 u32 dummy[2];
5374 unsigned i;
5375
5376 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
5377 "Please update the fixed PMCs in msrs_to_saved_all[]");
5378
5379 perf_get_x86_pmu_capability(&x86_pmu);
5380
5381 num_msrs_to_save = 0;
5382 num_emulated_msrs = 0;
5383 num_msr_based_features = 0;
5384
5385 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
5386 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
5387 continue;
5388
5389
5390
5391
5392
5393 switch (msrs_to_save_all[i]) {
5394 case MSR_IA32_BNDCFGS:
5395 if (!kvm_mpx_supported())
5396 continue;
5397 break;
5398 case MSR_TSC_AUX:
5399 if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
5400 continue;
5401 break;
5402 case MSR_IA32_UMWAIT_CONTROL:
5403 if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
5404 continue;
5405 break;
5406 case MSR_IA32_RTIT_CTL:
5407 case MSR_IA32_RTIT_STATUS:
5408 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
5409 continue;
5410 break;
5411 case MSR_IA32_RTIT_CR3_MATCH:
5412 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
5413 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
5414 continue;
5415 break;
5416 case MSR_IA32_RTIT_OUTPUT_BASE:
5417 case MSR_IA32_RTIT_OUTPUT_MASK:
5418 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
5419 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
5420 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
5421 continue;
5422 break;
5423 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
5424 if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) ||
5425 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
5426 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
5427 continue;
5428 break;
5429 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
5430 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
5431 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5432 continue;
5433 break;
5434 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
5435 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
5436 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5437 continue;
5438 break;
5439 default:
5440 break;
5441 }
5442
5443 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
5444 }
5445
5446 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
5447 if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i]))
5448 continue;
5449
5450 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
5451 }
5452
5453 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
5454 struct kvm_msr_entry msr;
5455
5456 msr.index = msr_based_features_all[i];
5457 if (kvm_get_msr_feature(&msr))
5458 continue;
5459
5460 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
5461 }
5462}
5463
5464static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
5465 const void *v)
5466{
5467 int handled = 0;
5468 int n;
5469
5470 do {
5471 n = min(len, 8);
5472 if (!(lapic_in_kernel(vcpu) &&
5473 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
5474 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
5475 break;
5476 handled += n;
5477 addr += n;
5478 len -= n;
5479 v += n;
5480 } while (len);
5481
5482 return handled;
5483}
5484
5485static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
5486{
5487 int handled = 0;
5488 int n;
5489
5490 do {
5491 n = min(len, 8);
5492 if (!(lapic_in_kernel(vcpu) &&
5493 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
5494 addr, n, v))
5495 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
5496 break;
5497 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
5498 handled += n;
5499 addr += n;
5500 len -= n;
5501 v += n;
5502 } while (len);
5503
5504 return handled;
5505}
5506
5507static void kvm_set_segment(struct kvm_vcpu *vcpu,
5508 struct kvm_segment *var, int seg)
5509{
5510 kvm_x86_ops.set_segment(vcpu, var, seg);
5511}
5512
5513void kvm_get_segment(struct kvm_vcpu *vcpu,
5514 struct kvm_segment *var, int seg)
5515{
5516 kvm_x86_ops.get_segment(vcpu, var, seg);
5517}
5518
5519gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
5520 struct x86_exception *exception)
5521{
5522 gpa_t t_gpa;
5523
5524 BUG_ON(!mmu_is_nested(vcpu));
5525
5526
5527 access |= PFERR_USER_MASK;
5528 t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
5529
5530 return t_gpa;
5531}
5532
5533gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
5534 struct x86_exception *exception)
5535{
5536 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5537 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5538}
5539
5540 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
5541 struct x86_exception *exception)
5542{
5543 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5544 access |= PFERR_FETCH_MASK;
5545 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5546}
5547
5548gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
5549 struct x86_exception *exception)
5550{
5551 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5552 access |= PFERR_WRITE_MASK;
5553 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5554}
5555
5556
5557gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
5558 struct x86_exception *exception)
5559{
5560 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
5561}
5562
5563static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5564 struct kvm_vcpu *vcpu, u32 access,
5565 struct x86_exception *exception)
5566{
5567 void *data = val;
5568 int r = X86EMUL_CONTINUE;
5569
5570 while (bytes) {
5571 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
5572 exception);
5573 unsigned offset = addr & (PAGE_SIZE-1);
5574 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
5575 int ret;
5576
5577 if (gpa == UNMAPPED_GVA)
5578 return X86EMUL_PROPAGATE_FAULT;
5579 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
5580 offset, toread);
5581 if (ret < 0) {
5582 r = X86EMUL_IO_NEEDED;
5583 goto out;
5584 }
5585
5586 bytes -= toread;
5587 data += toread;
5588 addr += toread;
5589 }
5590out:
5591 return r;
5592}
5593
5594
5595static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
5596 gva_t addr, void *val, unsigned int bytes,
5597 struct x86_exception *exception)
5598{
5599 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5600 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5601 unsigned offset;
5602 int ret;
5603
5604
5605 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
5606 exception);
5607 if (unlikely(gpa == UNMAPPED_GVA))
5608 return X86EMUL_PROPAGATE_FAULT;
5609
5610 offset = addr & (PAGE_SIZE-1);
5611 if (WARN_ON(offset + bytes > PAGE_SIZE))
5612 bytes = (unsigned)PAGE_SIZE - offset;
5613 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
5614 offset, bytes);
5615 if (unlikely(ret < 0))
5616 return X86EMUL_IO_NEEDED;
5617
5618 return X86EMUL_CONTINUE;
5619}
5620
5621int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
5622 gva_t addr, void *val, unsigned int bytes,
5623 struct x86_exception *exception)
5624{
5625 u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5626
5627
5628
5629
5630
5631
5632
5633 memset(exception, 0, sizeof(*exception));
5634 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
5635 exception);
5636}
5637EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
5638
5639static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
5640 gva_t addr, void *val, unsigned int bytes,
5641 struct x86_exception *exception, bool system)
5642{
5643 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5644 u32 access = 0;
5645
5646 if (!system && kvm_x86_ops.get_cpl(vcpu) == 3)
5647 access |= PFERR_USER_MASK;
5648
5649 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
5650}
5651
5652static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
5653 unsigned long addr, void *val, unsigned int bytes)
5654{
5655 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5656 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
5657
5658 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
5659}
5660
5661static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5662 struct kvm_vcpu *vcpu, u32 access,
5663 struct x86_exception *exception)
5664{
5665 void *data = val;
5666 int r = X86EMUL_CONTINUE;
5667
5668 while (bytes) {
5669 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
5670 access,
5671 exception);
5672 unsigned offset = addr & (PAGE_SIZE-1);
5673 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
5674 int ret;
5675
5676 if (gpa == UNMAPPED_GVA)
5677 return X86EMUL_PROPAGATE_FAULT;
5678 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
5679 if (ret < 0) {
5680 r = X86EMUL_IO_NEEDED;
5681 goto out;
5682 }
5683
5684 bytes -= towrite;
5685 data += towrite;
5686 addr += towrite;
5687 }
5688out:
5689 return r;
5690}
5691
5692static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
5693 unsigned int bytes, struct x86_exception *exception,
5694 bool system)
5695{
5696 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5697 u32 access = PFERR_WRITE_MASK;
5698
5699 if (!system && kvm_x86_ops.get_cpl(vcpu) == 3)
5700 access |= PFERR_USER_MASK;
5701
5702 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
5703 access, exception);
5704}
5705
5706int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
5707 unsigned int bytes, struct x86_exception *exception)
5708{
5709
5710 vcpu->arch.l1tf_flush_l1d = true;
5711
5712 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
5713 PFERR_WRITE_MASK, exception);
5714}
5715EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
5716
5717int handle_ud(struct kvm_vcpu *vcpu)
5718{
5719 static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
5720 int emul_type = EMULTYPE_TRAP_UD;
5721 char sig[5];
5722 struct x86_exception e;
5723
5724 if (force_emulation_prefix &&
5725 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
5726 sig, sizeof(sig), &e) == 0 &&
5727 memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
5728 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
5729 emul_type = EMULTYPE_TRAP_UD_FORCED;
5730 }
5731
5732 return kvm_emulate_instruction(vcpu, emul_type);
5733}
5734EXPORT_SYMBOL_GPL(handle_ud);
5735
5736static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5737 gpa_t gpa, bool write)
5738{
5739
5740 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5741 return 1;
5742
5743 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
5744 trace_vcpu_match_mmio(gva, gpa, write, true);
5745 return 1;
5746 }
5747
5748 return 0;
5749}
5750
5751static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5752 gpa_t *gpa, struct x86_exception *exception,
5753 bool write)
5754{
5755 u32 access = ((kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
5756 | (write ? PFERR_WRITE_MASK : 0);
5757
5758
5759
5760
5761
5762
5763 if (vcpu_match_mmio_gva(vcpu, gva)
5764 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
5765 vcpu->arch.mmio_access, 0, access)) {
5766 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
5767 (gva & (PAGE_SIZE - 1));
5768 trace_vcpu_match_mmio(gva, *gpa, write, false);
5769 return 1;
5770 }
5771
5772 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5773
5774 if (*gpa == UNMAPPED_GVA)
5775 return -1;
5776
5777 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
5778}
5779
5780int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
5781 const void *val, int bytes)
5782{
5783 int ret;
5784
5785 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
5786 if (ret < 0)
5787 return 0;
5788 kvm_page_track_write(vcpu, gpa, val, bytes);
5789 return 1;
5790}
5791
5792struct read_write_emulator_ops {
5793 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
5794 int bytes);
5795 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
5796 void *val, int bytes);
5797 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5798 int bytes, void *val);
5799 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5800 void *val, int bytes);
5801 bool write;
5802};
5803
5804static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
5805{
5806 if (vcpu->mmio_read_completed) {
5807 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
5808 vcpu->mmio_fragments[0].gpa, val);
5809 vcpu->mmio_read_completed = 0;
5810 return 1;
5811 }
5812
5813 return 0;
5814}
5815
5816static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5817 void *val, int bytes)
5818{
5819 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
5820}
5821
5822static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5823 void *val, int bytes)
5824{
5825 return emulator_write_phys(vcpu, gpa, val, bytes);
5826}
5827
5828static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
5829{
5830 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
5831 return vcpu_mmio_write(vcpu, gpa, bytes, val);
5832}
5833
5834static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5835 void *val, int bytes)
5836{
5837 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
5838 return X86EMUL_IO_NEEDED;
5839}
5840
5841static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5842 void *val, int bytes)
5843{
5844 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
5845
5846 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
5847 return X86EMUL_CONTINUE;
5848}
5849
5850static const struct read_write_emulator_ops read_emultor = {
5851 .read_write_prepare = read_prepare,
5852 .read_write_emulate = read_emulate,
5853 .read_write_mmio = vcpu_mmio_read,
5854 .read_write_exit_mmio = read_exit_mmio,
5855};
5856
5857static const struct read_write_emulator_ops write_emultor = {
5858 .read_write_emulate = write_emulate,
5859 .read_write_mmio = write_mmio,
5860 .read_write_exit_mmio = write_exit_mmio,
5861 .write = true,
5862};
5863
5864static int emulator_read_write_onepage(unsigned long addr, void *val,
5865 unsigned int bytes,
5866 struct x86_exception *exception,
5867 struct kvm_vcpu *vcpu,
5868 const struct read_write_emulator_ops *ops)
5869{
5870 gpa_t gpa;
5871 int handled, ret;
5872 bool write = ops->write;
5873 struct kvm_mmio_fragment *frag;
5874 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
5875
5876
5877
5878
5879
5880
5881
5882
5883 if (ctxt->gpa_available && emulator_can_use_gpa(ctxt) &&
5884 (addr & ~PAGE_MASK) == (ctxt->gpa_val & ~PAGE_MASK)) {
5885 gpa = ctxt->gpa_val;
5886 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
5887 } else {
5888 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
5889 if (ret < 0)
5890 return X86EMUL_PROPAGATE_FAULT;
5891 }
5892
5893 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
5894 return X86EMUL_CONTINUE;
5895
5896
5897
5898
5899 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
5900 if (handled == bytes)
5901 return X86EMUL_CONTINUE;
5902
5903 gpa += handled;
5904 bytes -= handled;
5905 val += handled;
5906
5907 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
5908 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
5909 frag->gpa = gpa;
5910 frag->data = val;
5911 frag->len = bytes;
5912 return X86EMUL_CONTINUE;
5913}
5914
5915static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
5916 unsigned long addr,
5917 void *val, unsigned int bytes,
5918 struct x86_exception *exception,
5919 const struct read_write_emulator_ops *ops)
5920{
5921 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5922 gpa_t gpa;
5923 int rc;
5924
5925 if (ops->read_write_prepare &&
5926 ops->read_write_prepare(vcpu, val, bytes))
5927 return X86EMUL_CONTINUE;
5928
5929 vcpu->mmio_nr_fragments = 0;
5930
5931
5932 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
5933 int now;
5934
5935 now = -addr & ~PAGE_MASK;
5936 rc = emulator_read_write_onepage(addr, val, now, exception,
5937 vcpu, ops);
5938
5939 if (rc != X86EMUL_CONTINUE)
5940 return rc;
5941 addr += now;
5942 if (ctxt->mode != X86EMUL_MODE_PROT64)
5943 addr = (u32)addr;
5944 val += now;
5945 bytes -= now;
5946 }
5947
5948 rc = emulator_read_write_onepage(addr, val, bytes, exception,
5949 vcpu, ops);
5950 if (rc != X86EMUL_CONTINUE)
5951 return rc;
5952
5953 if (!vcpu->mmio_nr_fragments)
5954 return rc;
5955
5956 gpa = vcpu->mmio_fragments[0].gpa;
5957
5958 vcpu->mmio_needed = 1;
5959 vcpu->mmio_cur_fragment = 0;
5960
5961 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
5962 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
5963 vcpu->run->exit_reason = KVM_EXIT_MMIO;
5964 vcpu->run->mmio.phys_addr = gpa;
5965
5966 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
5967}
5968
5969static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
5970 unsigned long addr,
5971 void *val,
5972 unsigned int bytes,
5973 struct x86_exception *exception)
5974{
5975 return emulator_read_write(ctxt, addr, val, bytes,
5976 exception, &read_emultor);
5977}
5978
5979static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
5980 unsigned long addr,
5981 const void *val,
5982 unsigned int bytes,
5983 struct x86_exception *exception)
5984{
5985 return emulator_read_write(ctxt, addr, (void *)val, bytes,
5986 exception, &write_emultor);
5987}
5988
5989#define CMPXCHG_TYPE(t, ptr, old, new) \
5990 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
5991
5992#ifdef CONFIG_X86_64
5993# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
5994#else
5995# define CMPXCHG64(ptr, old, new) \
5996 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
5997#endif
5998
5999static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
6000 unsigned long addr,
6001 const void *old,
6002 const void *new,
6003 unsigned int bytes,
6004 struct x86_exception *exception)
6005{
6006 struct kvm_host_map map;
6007 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6008 u64 page_line_mask;
6009 gpa_t gpa;
6010 char *kaddr;
6011 bool exchanged;
6012
6013
6014 if (bytes > 8 || (bytes & (bytes - 1)))
6015 goto emul_write;
6016
6017 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
6018
6019 if (gpa == UNMAPPED_GVA ||
6020 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
6021 goto emul_write;
6022
6023
6024
6025
6026
6027 if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
6028 page_line_mask = ~(cache_line_size() - 1);
6029 else
6030 page_line_mask = PAGE_MASK;
6031
6032 if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
6033 goto emul_write;
6034
6035 if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
6036 goto emul_write;
6037
6038 kaddr = map.hva + offset_in_page(gpa);
6039
6040 switch (bytes) {
6041 case 1:
6042 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
6043 break;
6044 case 2:
6045 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
6046 break;
6047 case 4:
6048 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
6049 break;
6050 case 8:
6051 exchanged = CMPXCHG64(kaddr, old, new);
6052 break;
6053 default:
6054 BUG();
6055 }
6056
6057 kvm_vcpu_unmap(vcpu, &map, true);
6058
6059 if (!exchanged)
6060 return X86EMUL_CMPXCHG_FAILED;
6061
6062 kvm_page_track_write(vcpu, gpa, new, bytes);
6063
6064 return X86EMUL_CONTINUE;
6065
6066emul_write:
6067 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
6068
6069 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
6070}
6071
6072static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
6073{
6074 int r = 0, i;
6075
6076 for (i = 0; i < vcpu->arch.pio.count; i++) {
6077 if (vcpu->arch.pio.in)
6078 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
6079 vcpu->arch.pio.size, pd);
6080 else
6081 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
6082 vcpu->arch.pio.port, vcpu->arch.pio.size,
6083 pd);
6084 if (r)
6085 break;
6086 pd += vcpu->arch.pio.size;
6087 }
6088 return r;
6089}
6090
6091static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
6092 unsigned short port, void *val,
6093 unsigned int count, bool in)
6094{
6095 vcpu->arch.pio.port = port;
6096 vcpu->arch.pio.in = in;
6097 vcpu->arch.pio.count = count;
6098 vcpu->arch.pio.size = size;
6099
6100 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
6101 vcpu->arch.pio.count = 0;
6102 return 1;
6103 }
6104
6105 vcpu->run->exit_reason = KVM_EXIT_IO;
6106 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
6107 vcpu->run->io.size = size;
6108 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
6109 vcpu->run->io.count = count;
6110 vcpu->run->io.port = port;
6111
6112 return 0;
6113}
6114
6115static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
6116 unsigned short port, void *val, unsigned int count)
6117{
6118 int ret;
6119
6120 if (vcpu->arch.pio.count)
6121 goto data_avail;
6122
6123 memset(vcpu->arch.pio_data, 0, size * count);
6124
6125 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
6126 if (ret) {
6127data_avail:
6128 memcpy(val, vcpu->arch.pio_data, size * count);
6129 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
6130 vcpu->arch.pio.count = 0;
6131 return 1;
6132 }
6133
6134 return 0;
6135}
6136
6137static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
6138 int size, unsigned short port, void *val,
6139 unsigned int count)
6140{
6141 return emulator_pio_in(emul_to_vcpu(ctxt), size, port, val, count);
6142
6143}
6144
6145static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
6146 unsigned short port, const void *val,
6147 unsigned int count)
6148{
6149 memcpy(vcpu->arch.pio_data, val, size * count);
6150 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
6151 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
6152}
6153
6154static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
6155 int size, unsigned short port,
6156 const void *val, unsigned int count)
6157{
6158 return emulator_pio_out(emul_to_vcpu(ctxt), size, port, val, count);
6159}
6160
6161static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
6162{
6163 return kvm_x86_ops.get_segment_base(vcpu, seg);
6164}
6165
6166static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
6167{
6168 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
6169}
6170
6171static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
6172{
6173 if (!need_emulate_wbinvd(vcpu))
6174 return X86EMUL_CONTINUE;
6175
6176 if (kvm_x86_ops.has_wbinvd_exit()) {
6177 int cpu = get_cpu();
6178
6179 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
6180 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
6181 wbinvd_ipi, NULL, 1);
6182 put_cpu();
6183 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
6184 } else
6185 wbinvd();
6186 return X86EMUL_CONTINUE;
6187}
6188
6189int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
6190{
6191 kvm_emulate_wbinvd_noskip(vcpu);
6192 return kvm_skip_emulated_instruction(vcpu);
6193}
6194EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
6195
6196
6197
6198static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
6199{
6200 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
6201}
6202
6203static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
6204 unsigned long *dest)
6205{
6206 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
6207}
6208
6209static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
6210 unsigned long value)
6211{
6212
6213 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
6214}
6215
6216static u64 mk_cr_64(u64 curr_cr, u32 new_val)
6217{
6218 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
6219}
6220
6221static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
6222{
6223 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6224 unsigned long value;
6225
6226 switch (cr) {
6227 case 0:
6228 value = kvm_read_cr0(vcpu);
6229 break;
6230 case 2:
6231 value = vcpu->arch.cr2;
6232 break;
6233 case 3:
6234 value = kvm_read_cr3(vcpu);
6235 break;
6236 case 4:
6237 value = kvm_read_cr4(vcpu);
6238 break;
6239 case 8:
6240 value = kvm_get_cr8(vcpu);
6241 break;
6242 default:
6243 kvm_err("%s: unexpected cr %u\n", __func__, cr);
6244 return 0;
6245 }
6246
6247 return value;
6248}
6249
6250static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
6251{
6252 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6253 int res = 0;
6254
6255 switch (cr) {
6256 case 0:
6257 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
6258 break;
6259 case 2:
6260 vcpu->arch.cr2 = val;
6261 break;
6262 case 3:
6263 res = kvm_set_cr3(vcpu, val);
6264 break;
6265 case 4:
6266 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
6267 break;
6268 case 8:
6269 res = kvm_set_cr8(vcpu, val);
6270 break;
6271 default:
6272 kvm_err("%s: unexpected cr %u\n", __func__, cr);
6273 res = -1;
6274 }
6275
6276 return res;
6277}
6278
6279static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
6280{
6281 return kvm_x86_ops.get_cpl(emul_to_vcpu(ctxt));
6282}
6283
6284static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6285{
6286 kvm_x86_ops.get_gdt(emul_to_vcpu(ctxt), dt);
6287}
6288
6289static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6290{
6291 kvm_x86_ops.get_idt(emul_to_vcpu(ctxt), dt);
6292}
6293
6294static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6295{
6296 kvm_x86_ops.set_gdt(emul_to_vcpu(ctxt), dt);
6297}
6298
6299static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6300{
6301 kvm_x86_ops.set_idt(emul_to_vcpu(ctxt), dt);
6302}
6303
6304static unsigned long emulator_get_cached_segment_base(
6305 struct x86_emulate_ctxt *ctxt, int seg)
6306{
6307 return get_segment_base(emul_to_vcpu(ctxt), seg);
6308}
6309
6310static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
6311 struct desc_struct *desc, u32 *base3,
6312 int seg)
6313{
6314 struct kvm_segment var;
6315
6316 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
6317 *selector = var.selector;
6318
6319 if (var.unusable) {
6320 memset(desc, 0, sizeof(*desc));
6321 if (base3)
6322 *base3 = 0;
6323 return false;
6324 }
6325
6326 if (var.g)
6327 var.limit >>= 12;
6328 set_desc_limit(desc, var.limit);
6329 set_desc_base(desc, (unsigned long)var.base);
6330#ifdef CONFIG_X86_64
6331 if (base3)
6332 *base3 = var.base >> 32;
6333#endif
6334 desc->type = var.type;
6335 desc->s = var.s;
6336 desc->dpl = var.dpl;
6337 desc->p = var.present;
6338 desc->avl = var.avl;
6339 desc->l = var.l;
6340 desc->d = var.db;
6341 desc->g = var.g;
6342
6343 return true;
6344}
6345
6346static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
6347 struct desc_struct *desc, u32 base3,
6348 int seg)
6349{
6350 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6351 struct kvm_segment var;
6352
6353 var.selector = selector;
6354 var.base = get_desc_base(desc);
6355#ifdef CONFIG_X86_64
6356 var.base |= ((u64)base3) << 32;
6357#endif
6358 var.limit = get_desc_limit(desc);
6359 if (desc->g)
6360 var.limit = (var.limit << 12) | 0xfff;
6361 var.type = desc->type;
6362 var.dpl = desc->dpl;
6363 var.db = desc->d;
6364 var.s = desc->s;
6365 var.l = desc->l;
6366 var.g = desc->g;
6367 var.avl = desc->avl;
6368 var.present = desc->p;
6369 var.unusable = !var.present;
6370 var.padding = 0;
6371
6372 kvm_set_segment(vcpu, &var, seg);
6373 return;
6374}
6375
6376static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
6377 u32 msr_index, u64 *pdata)
6378{
6379 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
6380}
6381
6382static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
6383 u32 msr_index, u64 data)
6384{
6385 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
6386}
6387
6388static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
6389{
6390 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6391
6392 return vcpu->arch.smbase;
6393}
6394
6395static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
6396{
6397 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6398
6399 vcpu->arch.smbase = smbase;
6400}
6401
6402static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
6403 u32 pmc)
6404{
6405 return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
6406}
6407
6408static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
6409 u32 pmc, u64 *pdata)
6410{
6411 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
6412}
6413
6414static void emulator_halt(struct x86_emulate_ctxt *ctxt)
6415{
6416 emul_to_vcpu(ctxt)->arch.halt_request = 1;
6417}
6418
6419static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
6420 struct x86_instruction_info *info,
6421 enum x86_intercept_stage stage)
6422{
6423 return kvm_x86_ops.check_intercept(emul_to_vcpu(ctxt), info, stage,
6424 &ctxt->exception);
6425}
6426
6427static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
6428 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx,
6429 bool exact_only)
6430{
6431 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
6432}
6433
6434static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
6435{
6436 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
6437}
6438
6439static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
6440{
6441 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
6442}
6443
6444static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt)
6445{
6446 return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR);
6447}
6448
6449static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
6450{
6451 return kvm_register_read(emul_to_vcpu(ctxt), reg);
6452}
6453
6454static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
6455{
6456 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
6457}
6458
6459static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
6460{
6461 kvm_x86_ops.set_nmi_mask(emul_to_vcpu(ctxt), masked);
6462}
6463
6464static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
6465{
6466 return emul_to_vcpu(ctxt)->arch.hflags;
6467}
6468
6469static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
6470{
6471 emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
6472}
6473
6474static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
6475 const char *smstate)
6476{
6477 return kvm_x86_ops.pre_leave_smm(emul_to_vcpu(ctxt), smstate);
6478}
6479
6480static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
6481{
6482 kvm_smm_changed(emul_to_vcpu(ctxt));
6483}
6484
6485static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
6486{
6487 return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
6488}
6489
6490static const struct x86_emulate_ops emulate_ops = {
6491 .read_gpr = emulator_read_gpr,
6492 .write_gpr = emulator_write_gpr,
6493 .read_std = emulator_read_std,
6494 .write_std = emulator_write_std,
6495 .read_phys = kvm_read_guest_phys_system,
6496 .fetch = kvm_fetch_guest_virt,
6497 .read_emulated = emulator_read_emulated,
6498 .write_emulated = emulator_write_emulated,
6499 .cmpxchg_emulated = emulator_cmpxchg_emulated,
6500 .invlpg = emulator_invlpg,
6501 .pio_in_emulated = emulator_pio_in_emulated,
6502 .pio_out_emulated = emulator_pio_out_emulated,
6503 .get_segment = emulator_get_segment,
6504 .set_segment = emulator_set_segment,
6505 .get_cached_segment_base = emulator_get_cached_segment_base,
6506 .get_gdt = emulator_get_gdt,
6507 .get_idt = emulator_get_idt,
6508 .set_gdt = emulator_set_gdt,
6509 .set_idt = emulator_set_idt,
6510 .get_cr = emulator_get_cr,
6511 .set_cr = emulator_set_cr,
6512 .cpl = emulator_get_cpl,
6513 .get_dr = emulator_get_dr,
6514 .set_dr = emulator_set_dr,
6515 .get_smbase = emulator_get_smbase,
6516 .set_smbase = emulator_set_smbase,
6517 .set_msr = emulator_set_msr,
6518 .get_msr = emulator_get_msr,
6519 .check_pmc = emulator_check_pmc,
6520 .read_pmc = emulator_read_pmc,
6521 .halt = emulator_halt,
6522 .wbinvd = emulator_wbinvd,
6523 .fix_hypercall = emulator_fix_hypercall,
6524 .intercept = emulator_intercept,
6525 .get_cpuid = emulator_get_cpuid,
6526 .guest_has_long_mode = emulator_guest_has_long_mode,
6527 .guest_has_movbe = emulator_guest_has_movbe,
6528 .guest_has_fxsr = emulator_guest_has_fxsr,
6529 .set_nmi_mask = emulator_set_nmi_mask,
6530 .get_hflags = emulator_get_hflags,
6531 .set_hflags = emulator_set_hflags,
6532 .pre_leave_smm = emulator_pre_leave_smm,
6533 .post_leave_smm = emulator_post_leave_smm,
6534 .set_xcr = emulator_set_xcr,
6535};
6536
6537static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
6538{
6539 u32 int_shadow = kvm_x86_ops.get_interrupt_shadow(vcpu);
6540
6541
6542
6543
6544
6545
6546
6547 if (int_shadow & mask)
6548 mask = 0;
6549 if (unlikely(int_shadow || mask)) {
6550 kvm_x86_ops.set_interrupt_shadow(vcpu, mask);
6551 if (!mask)
6552 kvm_make_request(KVM_REQ_EVENT, vcpu);
6553 }
6554}
6555
6556static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
6557{
6558 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6559 if (ctxt->exception.vector == PF_VECTOR)
6560 return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception);
6561
6562 if (ctxt->exception.error_code_valid)
6563 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
6564 ctxt->exception.error_code);
6565 else
6566 kvm_queue_exception(vcpu, ctxt->exception.vector);
6567 return false;
6568}
6569
6570static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)
6571{
6572 struct x86_emulate_ctxt *ctxt;
6573
6574 ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT);
6575 if (!ctxt) {
6576 pr_err("kvm: failed to allocate vcpu's emulator\n");
6577 return NULL;
6578 }
6579
6580 ctxt->vcpu = vcpu;
6581 ctxt->ops = &emulate_ops;
6582 vcpu->arch.emulate_ctxt = ctxt;
6583
6584 return ctxt;
6585}
6586
6587static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
6588{
6589 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6590 int cs_db, cs_l;
6591
6592 kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
6593
6594 ctxt->gpa_available = false;
6595 ctxt->eflags = kvm_get_rflags(vcpu);
6596 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
6597
6598 ctxt->eip = kvm_rip_read(vcpu);
6599 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
6600 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
6601 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
6602 cs_db ? X86EMUL_MODE_PROT32 :
6603 X86EMUL_MODE_PROT16;
6604 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
6605 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
6606 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
6607
6608 init_decode_cache(ctxt);
6609 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6610}
6611
6612void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
6613{
6614 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6615 int ret;
6616
6617 init_emulate_ctxt(vcpu);
6618
6619 ctxt->op_bytes = 2;
6620 ctxt->ad_bytes = 2;
6621 ctxt->_eip = ctxt->eip + inc_eip;
6622 ret = emulate_int_real(ctxt, irq);
6623
6624 if (ret != X86EMUL_CONTINUE) {
6625 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
6626 } else {
6627 ctxt->eip = ctxt->_eip;
6628 kvm_rip_write(vcpu, ctxt->eip);
6629 kvm_set_rflags(vcpu, ctxt->eflags);
6630 }
6631}
6632EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
6633
6634static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
6635{
6636 ++vcpu->stat.insn_emulation_fail;
6637 trace_kvm_emulate_insn_failed(vcpu);
6638
6639 if (emulation_type & EMULTYPE_VMWARE_GP) {
6640 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
6641 return 1;
6642 }
6643
6644 if (emulation_type & EMULTYPE_SKIP) {
6645 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6646 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6647 vcpu->run->internal.ndata = 0;
6648 return 0;
6649 }
6650
6651 kvm_queue_exception(vcpu, UD_VECTOR);
6652
6653 if (!is_guest_mode(vcpu) && kvm_x86_ops.get_cpl(vcpu) == 0) {
6654 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6655 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6656 vcpu->run->internal.ndata = 0;
6657 return 0;
6658 }
6659
6660 return 1;
6661}
6662
6663static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
6664 bool write_fault_to_shadow_pgtable,
6665 int emulation_type)
6666{
6667 gpa_t gpa = cr2_or_gpa;
6668 kvm_pfn_t pfn;
6669
6670 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
6671 return false;
6672
6673 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
6674 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
6675 return false;
6676
6677 if (!vcpu->arch.mmu->direct_map) {
6678
6679
6680
6681
6682 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
6683
6684
6685
6686
6687
6688 if (gpa == UNMAPPED_GVA)
6689 return true;
6690 }
6691
6692
6693
6694
6695
6696
6697
6698 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
6699
6700
6701
6702
6703
6704 if (is_error_noslot_pfn(pfn))
6705 return false;
6706
6707 kvm_release_pfn_clean(pfn);
6708
6709
6710 if (vcpu->arch.mmu->direct_map) {
6711 unsigned int indirect_shadow_pages;
6712
6713 spin_lock(&vcpu->kvm->mmu_lock);
6714 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
6715 spin_unlock(&vcpu->kvm->mmu_lock);
6716
6717 if (indirect_shadow_pages)
6718 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6719
6720 return true;
6721 }
6722
6723
6724
6725
6726
6727
6728 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6729
6730
6731
6732
6733
6734
6735 return !write_fault_to_shadow_pgtable;
6736}
6737
6738static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
6739 gpa_t cr2_or_gpa, int emulation_type)
6740{
6741 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6742 unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
6743
6744 last_retry_eip = vcpu->arch.last_retry_eip;
6745 last_retry_addr = vcpu->arch.last_retry_addr;
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
6761
6762 if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
6763 return false;
6764
6765 if (WARN_ON_ONCE(is_guest_mode(vcpu)) ||
6766 WARN_ON_ONCE(!(emulation_type & EMULTYPE_PF)))
6767 return false;
6768
6769 if (x86_page_table_writing_insn(ctxt))
6770 return false;
6771
6772 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
6773 return false;
6774
6775 vcpu->arch.last_retry_eip = ctxt->eip;
6776 vcpu->arch.last_retry_addr = cr2_or_gpa;
6777
6778 if (!vcpu->arch.mmu->direct_map)
6779 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
6780
6781 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6782
6783 return true;
6784}
6785
6786static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
6787static int complete_emulated_pio(struct kvm_vcpu *vcpu);
6788
6789static void kvm_smm_changed(struct kvm_vcpu *vcpu)
6790{
6791 if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
6792
6793 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
6794
6795
6796 kvm_make_request(KVM_REQ_EVENT, vcpu);
6797 }
6798
6799 kvm_mmu_reset_context(vcpu);
6800}
6801
6802static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
6803 unsigned long *db)
6804{
6805 u32 dr6 = 0;
6806 int i;
6807 u32 enable, rwlen;
6808
6809 enable = dr7;
6810 rwlen = dr7 >> 16;
6811 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
6812 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
6813 dr6 |= (1 << i);
6814 return dr6;
6815}
6816
6817static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
6818{
6819 struct kvm_run *kvm_run = vcpu->run;
6820
6821 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
6822 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
6823 kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
6824 kvm_run->debug.arch.exception = DB_VECTOR;
6825 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6826 return 0;
6827 }
6828 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
6829 return 1;
6830}
6831
6832int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
6833{
6834 unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
6835 int r;
6836
6837 r = kvm_x86_ops.skip_emulated_instruction(vcpu);
6838 if (unlikely(!r))
6839 return 0;
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849 if (unlikely(rflags & X86_EFLAGS_TF))
6850 r = kvm_vcpu_do_singlestep(vcpu);
6851 return r;
6852}
6853EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
6854
6855static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
6856{
6857 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
6858 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
6859 struct kvm_run *kvm_run = vcpu->run;
6860 unsigned long eip = kvm_get_linear_rip(vcpu);
6861 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6862 vcpu->arch.guest_debug_dr7,
6863 vcpu->arch.eff_db);
6864
6865 if (dr6 != 0) {
6866 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
6867 kvm_run->debug.arch.pc = eip;
6868 kvm_run->debug.arch.exception = DB_VECTOR;
6869 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6870 *r = 0;
6871 return true;
6872 }
6873 }
6874
6875 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
6876 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
6877 unsigned long eip = kvm_get_linear_rip(vcpu);
6878 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6879 vcpu->arch.dr7,
6880 vcpu->arch.db);
6881
6882 if (dr6 != 0) {
6883 kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
6884 *r = 1;
6885 return true;
6886 }
6887 }
6888
6889 return false;
6890}
6891
6892static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
6893{
6894 switch (ctxt->opcode_len) {
6895 case 1:
6896 switch (ctxt->b) {
6897 case 0xe4:
6898 case 0xe5:
6899 case 0xec:
6900 case 0xed:
6901 case 0xe6:
6902 case 0xe7:
6903 case 0xee:
6904 case 0xef:
6905 case 0x6c:
6906 case 0x6d:
6907 case 0x6e:
6908 case 0x6f:
6909 return true;
6910 }
6911 break;
6912 case 2:
6913 switch (ctxt->b) {
6914 case 0x33:
6915 return true;
6916 }
6917 break;
6918 }
6919
6920 return false;
6921}
6922
6923int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
6924 int emulation_type, void *insn, int insn_len)
6925{
6926 int r;
6927 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
6928 bool writeback = true;
6929 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
6930
6931 vcpu->arch.l1tf_flush_l1d = true;
6932
6933
6934
6935
6936
6937 vcpu->arch.write_fault_to_shadow_pgtable = false;
6938 kvm_clear_exception_queue(vcpu);
6939
6940 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
6941 init_emulate_ctxt(vcpu);
6942
6943
6944
6945
6946
6947
6948
6949 if (!(emulation_type & EMULTYPE_SKIP) &&
6950 kvm_vcpu_check_breakpoint(vcpu, &r))
6951 return r;
6952
6953 ctxt->interruptibility = 0;
6954 ctxt->have_exception = false;
6955 ctxt->exception.vector = -1;
6956 ctxt->perm_ok = false;
6957
6958 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
6959
6960 r = x86_decode_insn(ctxt, insn, insn_len);
6961
6962 trace_kvm_emulate_insn_start(vcpu);
6963 ++vcpu->stat.insn_emulation;
6964 if (r != EMULATION_OK) {
6965 if ((emulation_type & EMULTYPE_TRAP_UD) ||
6966 (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
6967 kvm_queue_exception(vcpu, UD_VECTOR);
6968 return 1;
6969 }
6970 if (reexecute_instruction(vcpu, cr2_or_gpa,
6971 write_fault_to_spt,
6972 emulation_type))
6973 return 1;
6974 if (ctxt->have_exception) {
6975
6976
6977
6978
6979 WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
6980 exception_type(ctxt->exception.vector) == EXCPT_TRAP);
6981 inject_emulated_exception(vcpu);
6982 return 1;
6983 }
6984 return handle_emulation_failure(vcpu, emulation_type);
6985 }
6986 }
6987
6988 if ((emulation_type & EMULTYPE_VMWARE_GP) &&
6989 !is_vmware_backdoor_opcode(ctxt)) {
6990 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
6991 return 1;
6992 }
6993
6994
6995
6996
6997
6998
6999 if (emulation_type & EMULTYPE_SKIP) {
7000 kvm_rip_write(vcpu, ctxt->_eip);
7001 if (ctxt->eflags & X86_EFLAGS_RF)
7002 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
7003 return 1;
7004 }
7005
7006 if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
7007 return 1;
7008
7009
7010
7011 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
7012 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
7013 emulator_invalidate_register_cache(ctxt);
7014 }
7015
7016restart:
7017 if (emulation_type & EMULTYPE_PF) {
7018
7019 ctxt->exception.address = cr2_or_gpa;
7020
7021
7022 if (vcpu->arch.mmu->direct_map) {
7023 ctxt->gpa_available = true;
7024 ctxt->gpa_val = cr2_or_gpa;
7025 }
7026 } else {
7027
7028 ctxt->exception.address = 0;
7029 }
7030
7031 r = x86_emulate_insn(ctxt);
7032
7033 if (r == EMULATION_INTERCEPTED)
7034 return 1;
7035
7036 if (r == EMULATION_FAILED) {
7037 if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
7038 emulation_type))
7039 return 1;
7040
7041 return handle_emulation_failure(vcpu, emulation_type);
7042 }
7043
7044 if (ctxt->have_exception) {
7045 r = 1;
7046 if (inject_emulated_exception(vcpu))
7047 return r;
7048 } else if (vcpu->arch.pio.count) {
7049 if (!vcpu->arch.pio.in) {
7050
7051 vcpu->arch.pio.count = 0;
7052 } else {
7053 writeback = false;
7054 vcpu->arch.complete_userspace_io = complete_emulated_pio;
7055 }
7056 r = 0;
7057 } else if (vcpu->mmio_needed) {
7058 ++vcpu->stat.mmio_exits;
7059
7060 if (!vcpu->mmio_is_write)
7061 writeback = false;
7062 r = 0;
7063 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
7064 } else if (r == EMULATION_RESTART)
7065 goto restart;
7066 else
7067 r = 1;
7068
7069 if (writeback) {
7070 unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
7071 toggle_interruptibility(vcpu, ctxt->interruptibility);
7072 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
7073 if (!ctxt->have_exception ||
7074 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
7075 kvm_rip_write(vcpu, ctxt->eip);
7076 if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
7077 r = kvm_vcpu_do_singlestep(vcpu);
7078 if (kvm_x86_ops.update_emulated_instruction)
7079 kvm_x86_ops.update_emulated_instruction(vcpu);
7080 __kvm_set_rflags(vcpu, ctxt->eflags);
7081 }
7082
7083
7084
7085
7086
7087
7088
7089 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
7090 kvm_make_request(KVM_REQ_EVENT, vcpu);
7091 } else
7092 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
7093
7094 return r;
7095}
7096
7097int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
7098{
7099 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
7100}
7101EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
7102
7103int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
7104 void *insn, int insn_len)
7105{
7106 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
7107}
7108EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
7109
7110static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
7111{
7112 vcpu->arch.pio.count = 0;
7113 return 1;
7114}
7115
7116static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
7117{
7118 vcpu->arch.pio.count = 0;
7119
7120 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
7121 return 1;
7122
7123 return kvm_skip_emulated_instruction(vcpu);
7124}
7125
7126static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
7127 unsigned short port)
7128{
7129 unsigned long val = kvm_rax_read(vcpu);
7130 int ret = emulator_pio_out(vcpu, size, port, &val, 1);
7131
7132 if (ret)
7133 return ret;
7134
7135
7136
7137
7138
7139 if (port == 0x7e &&
7140 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
7141 vcpu->arch.complete_userspace_io =
7142 complete_fast_pio_out_port_0x7e;
7143 kvm_skip_emulated_instruction(vcpu);
7144 } else {
7145 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
7146 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
7147 }
7148 return 0;
7149}
7150
7151static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
7152{
7153 unsigned long val;
7154
7155
7156 BUG_ON(vcpu->arch.pio.count != 1);
7157
7158 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
7159 vcpu->arch.pio.count = 0;
7160 return 1;
7161 }
7162
7163
7164 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
7165
7166
7167
7168
7169
7170 emulator_pio_in(vcpu, vcpu->arch.pio.size, vcpu->arch.pio.port, &val, 1);
7171 kvm_rax_write(vcpu, val);
7172
7173 return kvm_skip_emulated_instruction(vcpu);
7174}
7175
7176static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
7177 unsigned short port)
7178{
7179 unsigned long val;
7180 int ret;
7181
7182
7183 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
7184
7185 ret = emulator_pio_in(vcpu, size, port, &val, 1);
7186 if (ret) {
7187 kvm_rax_write(vcpu, val);
7188 return ret;
7189 }
7190
7191 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
7192 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
7193
7194 return 0;
7195}
7196
7197int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
7198{
7199 int ret;
7200
7201 if (in)
7202 ret = kvm_fast_pio_in(vcpu, size, port);
7203 else
7204 ret = kvm_fast_pio_out(vcpu, size, port);
7205 return ret && kvm_skip_emulated_instruction(vcpu);
7206}
7207EXPORT_SYMBOL_GPL(kvm_fast_pio);
7208
7209static int kvmclock_cpu_down_prep(unsigned int cpu)
7210{
7211 __this_cpu_write(cpu_tsc_khz, 0);
7212 return 0;
7213}
7214
7215static void tsc_khz_changed(void *data)
7216{
7217 struct cpufreq_freqs *freq = data;
7218 unsigned long khz = 0;
7219
7220 if (data)
7221 khz = freq->new;
7222 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
7223 khz = cpufreq_quick_get(raw_smp_processor_id());
7224 if (!khz)
7225 khz = tsc_khz;
7226 __this_cpu_write(cpu_tsc_khz, khz);
7227}
7228
7229#ifdef CONFIG_X86_64
7230static void kvm_hyperv_tsc_notifier(void)
7231{
7232 struct kvm *kvm;
7233 struct kvm_vcpu *vcpu;
7234 int cpu;
7235
7236 mutex_lock(&kvm_lock);
7237 list_for_each_entry(kvm, &vm_list, vm_list)
7238 kvm_make_mclock_inprogress_request(kvm);
7239
7240 hyperv_stop_tsc_emulation();
7241
7242
7243 for_each_present_cpu(cpu)
7244 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
7245 kvm_max_guest_tsc_khz = tsc_khz;
7246
7247 list_for_each_entry(kvm, &vm_list, vm_list) {
7248 struct kvm_arch *ka = &kvm->arch;
7249
7250 spin_lock(&ka->pvclock_gtod_sync_lock);
7251
7252 pvclock_update_vm_gtod_copy(kvm);
7253
7254 kvm_for_each_vcpu(cpu, vcpu, kvm)
7255 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7256
7257 kvm_for_each_vcpu(cpu, vcpu, kvm)
7258 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
7259
7260 spin_unlock(&ka->pvclock_gtod_sync_lock);
7261 }
7262 mutex_unlock(&kvm_lock);
7263}
7264#endif
7265
7266static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
7267{
7268 struct kvm *kvm;
7269 struct kvm_vcpu *vcpu;
7270 int i, send_ipi = 0;
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
7312
7313 mutex_lock(&kvm_lock);
7314 list_for_each_entry(kvm, &vm_list, vm_list) {
7315 kvm_for_each_vcpu(i, vcpu, kvm) {
7316 if (vcpu->cpu != cpu)
7317 continue;
7318 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7319 if (vcpu->cpu != raw_smp_processor_id())
7320 send_ipi = 1;
7321 }
7322 }
7323 mutex_unlock(&kvm_lock);
7324
7325 if (freq->old < freq->new && send_ipi) {
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
7339 }
7340}
7341
7342static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
7343 void *data)
7344{
7345 struct cpufreq_freqs *freq = data;
7346 int cpu;
7347
7348 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
7349 return 0;
7350 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
7351 return 0;
7352
7353 for_each_cpu(cpu, freq->policy->cpus)
7354 __kvmclock_cpufreq_notifier(freq, cpu);
7355
7356 return 0;
7357}
7358
7359static struct notifier_block kvmclock_cpufreq_notifier_block = {
7360 .notifier_call = kvmclock_cpufreq_notifier
7361};
7362
7363static int kvmclock_cpu_online(unsigned int cpu)
7364{
7365 tsc_khz_changed(NULL);
7366 return 0;
7367}
7368
7369static void kvm_timer_init(void)
7370{
7371 max_tsc_khz = tsc_khz;
7372
7373 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
7374#ifdef CONFIG_CPU_FREQ
7375 struct cpufreq_policy *policy;
7376 int cpu;
7377
7378 cpu = get_cpu();
7379 policy = cpufreq_cpu_get(cpu);
7380 if (policy) {
7381 if (policy->cpuinfo.max_freq)
7382 max_tsc_khz = policy->cpuinfo.max_freq;
7383 cpufreq_cpu_put(policy);
7384 }
7385 put_cpu();
7386#endif
7387 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
7388 CPUFREQ_TRANSITION_NOTIFIER);
7389 }
7390
7391 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
7392 kvmclock_cpu_online, kvmclock_cpu_down_prep);
7393}
7394
7395DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
7396EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
7397
7398int kvm_is_in_guest(void)
7399{
7400 return __this_cpu_read(current_vcpu) != NULL;
7401}
7402
7403static int kvm_is_user_mode(void)
7404{
7405 int user_mode = 3;
7406
7407 if (__this_cpu_read(current_vcpu))
7408 user_mode = kvm_x86_ops.get_cpl(__this_cpu_read(current_vcpu));
7409
7410 return user_mode != 0;
7411}
7412
7413static unsigned long kvm_get_guest_ip(void)
7414{
7415 unsigned long ip = 0;
7416
7417 if (__this_cpu_read(current_vcpu))
7418 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
7419
7420 return ip;
7421}
7422
7423static void kvm_handle_intel_pt_intr(void)
7424{
7425 struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
7426
7427 kvm_make_request(KVM_REQ_PMI, vcpu);
7428 __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
7429 (unsigned long *)&vcpu->arch.pmu.global_status);
7430}
7431
7432static struct perf_guest_info_callbacks kvm_guest_cbs = {
7433 .is_in_guest = kvm_is_in_guest,
7434 .is_user_mode = kvm_is_user_mode,
7435 .get_guest_ip = kvm_get_guest_ip,
7436 .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
7437};
7438
7439#ifdef CONFIG_X86_64
7440static void pvclock_gtod_update_fn(struct work_struct *work)
7441{
7442 struct kvm *kvm;
7443
7444 struct kvm_vcpu *vcpu;
7445 int i;
7446
7447 mutex_lock(&kvm_lock);
7448 list_for_each_entry(kvm, &vm_list, vm_list)
7449 kvm_for_each_vcpu(i, vcpu, kvm)
7450 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7451 atomic_set(&kvm_guest_has_master_clock, 0);
7452 mutex_unlock(&kvm_lock);
7453}
7454
7455static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
7456
7457
7458
7459
7460static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
7461 void *priv)
7462{
7463 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
7464 struct timekeeper *tk = priv;
7465
7466 update_pvclock_gtod(tk);
7467
7468
7469
7470
7471 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
7472 atomic_read(&kvm_guest_has_master_clock) != 0)
7473 queue_work(system_long_wq, &pvclock_gtod_work);
7474
7475 return 0;
7476}
7477
7478static struct notifier_block pvclock_gtod_notifier = {
7479 .notifier_call = pvclock_gtod_notify,
7480};
7481#endif
7482
7483int kvm_arch_init(void *opaque)
7484{
7485 struct kvm_x86_init_ops *ops = opaque;
7486 int r;
7487
7488 if (kvm_x86_ops.hardware_enable) {
7489 printk(KERN_ERR "kvm: already loaded the other module\n");
7490 r = -EEXIST;
7491 goto out;
7492 }
7493
7494 if (!ops->cpu_has_kvm_support()) {
7495 pr_err_ratelimited("kvm: no hardware support\n");
7496 r = -EOPNOTSUPP;
7497 goto out;
7498 }
7499 if (ops->disabled_by_bios()) {
7500 pr_err_ratelimited("kvm: disabled by bios\n");
7501 r = -EOPNOTSUPP;
7502 goto out;
7503 }
7504
7505
7506
7507
7508
7509
7510 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
7511 printk(KERN_ERR "kvm: inadequate fpu\n");
7512 r = -EOPNOTSUPP;
7513 goto out;
7514 }
7515
7516 r = -ENOMEM;
7517 x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
7518 __alignof__(struct fpu), SLAB_ACCOUNT,
7519 NULL);
7520 if (!x86_fpu_cache) {
7521 printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
7522 goto out;
7523 }
7524
7525 x86_emulator_cache = kvm_alloc_emulator_cache();
7526 if (!x86_emulator_cache) {
7527 pr_err("kvm: failed to allocate cache for x86 emulator\n");
7528 goto out_free_x86_fpu_cache;
7529 }
7530
7531 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
7532 if (!shared_msrs) {
7533 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
7534 goto out_free_x86_emulator_cache;
7535 }
7536
7537 r = kvm_mmu_module_init();
7538 if (r)
7539 goto out_free_percpu;
7540
7541 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
7542 PT_DIRTY_MASK, PT64_NX_MASK, 0,
7543 PT_PRESENT_MASK, 0, sme_me_mask);
7544 kvm_timer_init();
7545
7546 perf_register_guest_info_callbacks(&kvm_guest_cbs);
7547
7548 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
7549 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
7550 supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
7551 }
7552
7553 kvm_lapic_init();
7554 if (pi_inject_timer == -1)
7555 pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
7556#ifdef CONFIG_X86_64
7557 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
7558
7559 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7560 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
7561#endif
7562
7563 return 0;
7564
7565out_free_percpu:
7566 free_percpu(shared_msrs);
7567out_free_x86_emulator_cache:
7568 kmem_cache_destroy(x86_emulator_cache);
7569out_free_x86_fpu_cache:
7570 kmem_cache_destroy(x86_fpu_cache);
7571out:
7572 return r;
7573}
7574
7575void kvm_arch_exit(void)
7576{
7577#ifdef CONFIG_X86_64
7578 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7579 clear_hv_tscchange_cb();
7580#endif
7581 kvm_lapic_exit();
7582 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
7583
7584 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
7585 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
7586 CPUFREQ_TRANSITION_NOTIFIER);
7587 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
7588#ifdef CONFIG_X86_64
7589 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
7590#endif
7591 kvm_x86_ops.hardware_enable = NULL;
7592 kvm_mmu_module_exit();
7593 free_percpu(shared_msrs);
7594 kmem_cache_destroy(x86_fpu_cache);
7595}
7596
7597int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
7598{
7599 ++vcpu->stat.halt_exits;
7600 if (lapic_in_kernel(vcpu)) {
7601 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
7602 return 1;
7603 } else {
7604 vcpu->run->exit_reason = KVM_EXIT_HLT;
7605 return 0;
7606 }
7607}
7608EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
7609
7610int kvm_emulate_halt(struct kvm_vcpu *vcpu)
7611{
7612 int ret = kvm_skip_emulated_instruction(vcpu);
7613
7614
7615
7616
7617 return kvm_vcpu_halt(vcpu) && ret;
7618}
7619EXPORT_SYMBOL_GPL(kvm_emulate_halt);
7620
7621#ifdef CONFIG_X86_64
7622static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
7623 unsigned long clock_type)
7624{
7625 struct kvm_clock_pairing clock_pairing;
7626 struct timespec64 ts;
7627 u64 cycle;
7628 int ret;
7629
7630 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
7631 return -KVM_EOPNOTSUPP;
7632
7633 if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
7634 return -KVM_EOPNOTSUPP;
7635
7636 clock_pairing.sec = ts.tv_sec;
7637 clock_pairing.nsec = ts.tv_nsec;
7638 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
7639 clock_pairing.flags = 0;
7640 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
7641
7642 ret = 0;
7643 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
7644 sizeof(struct kvm_clock_pairing)))
7645 ret = -KVM_EFAULT;
7646
7647 return ret;
7648}
7649#endif
7650
7651
7652
7653
7654
7655
7656static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
7657{
7658 struct kvm_lapic_irq lapic_irq;
7659
7660 lapic_irq.shorthand = APIC_DEST_NOSHORT;
7661 lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
7662 lapic_irq.level = 0;
7663 lapic_irq.dest_id = apicid;
7664 lapic_irq.msi_redir_hint = false;
7665
7666 lapic_irq.delivery_mode = APIC_DM_REMRD;
7667 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
7668}
7669
7670bool kvm_apicv_activated(struct kvm *kvm)
7671{
7672 return (READ_ONCE(kvm->arch.apicv_inhibit_reasons) == 0);
7673}
7674EXPORT_SYMBOL_GPL(kvm_apicv_activated);
7675
7676void kvm_apicv_init(struct kvm *kvm, bool enable)
7677{
7678 if (enable)
7679 clear_bit(APICV_INHIBIT_REASON_DISABLE,
7680 &kvm->arch.apicv_inhibit_reasons);
7681 else
7682 set_bit(APICV_INHIBIT_REASON_DISABLE,
7683 &kvm->arch.apicv_inhibit_reasons);
7684}
7685EXPORT_SYMBOL_GPL(kvm_apicv_init);
7686
7687static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
7688{
7689 struct kvm_vcpu *target = NULL;
7690 struct kvm_apic_map *map;
7691
7692 rcu_read_lock();
7693 map = rcu_dereference(kvm->arch.apic_map);
7694
7695 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
7696 target = map->phys_map[dest_id]->vcpu;
7697
7698 rcu_read_unlock();
7699
7700 if (target && READ_ONCE(target->ready))
7701 kvm_vcpu_yield_to(target);
7702}
7703
7704int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
7705{
7706 unsigned long nr, a0, a1, a2, a3, ret;
7707 int op_64_bit;
7708
7709 if (kvm_hv_hypercall_enabled(vcpu->kvm))
7710 return kvm_hv_hypercall(vcpu);
7711
7712 nr = kvm_rax_read(vcpu);
7713 a0 = kvm_rbx_read(vcpu);
7714 a1 = kvm_rcx_read(vcpu);
7715 a2 = kvm_rdx_read(vcpu);
7716 a3 = kvm_rsi_read(vcpu);
7717
7718 trace_kvm_hypercall(nr, a0, a1, a2, a3);
7719
7720 op_64_bit = is_64_bit_mode(vcpu);
7721 if (!op_64_bit) {
7722 nr &= 0xFFFFFFFF;
7723 a0 &= 0xFFFFFFFF;
7724 a1 &= 0xFFFFFFFF;
7725 a2 &= 0xFFFFFFFF;
7726 a3 &= 0xFFFFFFFF;
7727 }
7728
7729 if (kvm_x86_ops.get_cpl(vcpu) != 0) {
7730 ret = -KVM_EPERM;
7731 goto out;
7732 }
7733
7734 switch (nr) {
7735 case KVM_HC_VAPIC_POLL_IRQ:
7736 ret = 0;
7737 break;
7738 case KVM_HC_KICK_CPU:
7739 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
7740 kvm_sched_yield(vcpu->kvm, a1);
7741 ret = 0;
7742 break;
7743#ifdef CONFIG_X86_64
7744 case KVM_HC_CLOCK_PAIRING:
7745 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
7746 break;
7747#endif
7748 case KVM_HC_SEND_IPI:
7749 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
7750 break;
7751 case KVM_HC_SCHED_YIELD:
7752 kvm_sched_yield(vcpu->kvm, a0);
7753 ret = 0;
7754 break;
7755 default:
7756 ret = -KVM_ENOSYS;
7757 break;
7758 }
7759out:
7760 if (!op_64_bit)
7761 ret = (u32)ret;
7762 kvm_rax_write(vcpu, ret);
7763
7764 ++vcpu->stat.hypercalls;
7765 return kvm_skip_emulated_instruction(vcpu);
7766}
7767EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
7768
7769static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
7770{
7771 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7772 char instruction[3];
7773 unsigned long rip = kvm_rip_read(vcpu);
7774
7775 kvm_x86_ops.patch_hypercall(vcpu, instruction);
7776
7777 return emulator_write_emulated(ctxt, rip, instruction, 3,
7778 &ctxt->exception);
7779}
7780
7781static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
7782{
7783 return vcpu->run->request_interrupt_window &&
7784 likely(!pic_in_kernel(vcpu->kvm));
7785}
7786
7787static void post_kvm_run_save(struct kvm_vcpu *vcpu)
7788{
7789 struct kvm_run *kvm_run = vcpu->run;
7790
7791 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
7792 kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
7793 kvm_run->cr8 = kvm_get_cr8(vcpu);
7794 kvm_run->apic_base = kvm_get_apic_base(vcpu);
7795 kvm_run->ready_for_interrupt_injection =
7796 pic_in_kernel(vcpu->kvm) ||
7797 kvm_vcpu_ready_for_interrupt_injection(vcpu);
7798}
7799
7800static void update_cr8_intercept(struct kvm_vcpu *vcpu)
7801{
7802 int max_irr, tpr;
7803
7804 if (!kvm_x86_ops.update_cr8_intercept)
7805 return;
7806
7807 if (!lapic_in_kernel(vcpu))
7808 return;
7809
7810 if (vcpu->arch.apicv_active)
7811 return;
7812
7813 if (!vcpu->arch.apic->vapic_addr)
7814 max_irr = kvm_lapic_find_highest_irr(vcpu);
7815 else
7816 max_irr = -1;
7817
7818 if (max_irr != -1)
7819 max_irr >>= 4;
7820
7821 tpr = kvm_lapic_get_cr8(vcpu);
7822
7823 kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr);
7824}
7825
7826static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
7827{
7828 int r;
7829 bool can_inject = true;
7830
7831
7832
7833 if (vcpu->arch.exception.injected) {
7834 kvm_x86_ops.queue_exception(vcpu);
7835 can_inject = false;
7836 }
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851 else if (!vcpu->arch.exception.pending) {
7852 if (vcpu->arch.nmi_injected) {
7853 kvm_x86_ops.set_nmi(vcpu);
7854 can_inject = false;
7855 } else if (vcpu->arch.interrupt.injected) {
7856 kvm_x86_ops.set_irq(vcpu);
7857 can_inject = false;
7858 }
7859 }
7860
7861 WARN_ON_ONCE(vcpu->arch.exception.injected &&
7862 vcpu->arch.exception.pending);
7863
7864
7865
7866
7867
7868
7869
7870 if (is_guest_mode(vcpu)) {
7871 r = kvm_x86_ops.nested_ops->check_events(vcpu);
7872 if (r < 0)
7873 goto busy;
7874 }
7875
7876
7877 if (vcpu->arch.exception.pending) {
7878 trace_kvm_inj_exception(vcpu->arch.exception.nr,
7879 vcpu->arch.exception.has_error_code,
7880 vcpu->arch.exception.error_code);
7881
7882 vcpu->arch.exception.pending = false;
7883 vcpu->arch.exception.injected = true;
7884
7885 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
7886 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
7887 X86_EFLAGS_RF);
7888
7889 if (vcpu->arch.exception.nr == DB_VECTOR) {
7890 kvm_deliver_exception_payload(vcpu);
7891 if (vcpu->arch.dr7 & DR7_GD) {
7892 vcpu->arch.dr7 &= ~DR7_GD;
7893 kvm_update_dr7(vcpu);
7894 }
7895 }
7896
7897 kvm_x86_ops.queue_exception(vcpu);
7898 can_inject = false;
7899 }
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912 if (vcpu->arch.smi_pending) {
7913 r = can_inject ? kvm_x86_ops.smi_allowed(vcpu, true) : -EBUSY;
7914 if (r < 0)
7915 goto busy;
7916 if (r) {
7917 vcpu->arch.smi_pending = false;
7918 ++vcpu->arch.smi_count;
7919 enter_smm(vcpu);
7920 can_inject = false;
7921 } else
7922 kvm_x86_ops.enable_smi_window(vcpu);
7923 }
7924
7925 if (vcpu->arch.nmi_pending) {
7926 r = can_inject ? kvm_x86_ops.nmi_allowed(vcpu, true) : -EBUSY;
7927 if (r < 0)
7928 goto busy;
7929 if (r) {
7930 --vcpu->arch.nmi_pending;
7931 vcpu->arch.nmi_injected = true;
7932 kvm_x86_ops.set_nmi(vcpu);
7933 can_inject = false;
7934 WARN_ON(kvm_x86_ops.nmi_allowed(vcpu, true) < 0);
7935 }
7936 if (vcpu->arch.nmi_pending)
7937 kvm_x86_ops.enable_nmi_window(vcpu);
7938 }
7939
7940 if (kvm_cpu_has_injectable_intr(vcpu)) {
7941 r = can_inject ? kvm_x86_ops.interrupt_allowed(vcpu, true) : -EBUSY;
7942 if (r < 0)
7943 goto busy;
7944 if (r) {
7945 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
7946 kvm_x86_ops.set_irq(vcpu);
7947 WARN_ON(kvm_x86_ops.interrupt_allowed(vcpu, true) < 0);
7948 }
7949 if (kvm_cpu_has_injectable_intr(vcpu))
7950 kvm_x86_ops.enable_irq_window(vcpu);
7951 }
7952
7953 if (is_guest_mode(vcpu) &&
7954 kvm_x86_ops.nested_ops->hv_timer_pending &&
7955 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
7956 *req_immediate_exit = true;
7957
7958 WARN_ON(vcpu->arch.exception.pending);
7959 return;
7960
7961busy:
7962 *req_immediate_exit = true;
7963 return;
7964}
7965
7966static void process_nmi(struct kvm_vcpu *vcpu)
7967{
7968 unsigned limit = 2;
7969
7970
7971
7972
7973
7974
7975 if (kvm_x86_ops.get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
7976 limit = 1;
7977
7978 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
7979 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
7980 kvm_make_request(KVM_REQ_EVENT, vcpu);
7981}
7982
7983static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
7984{
7985 u32 flags = 0;
7986 flags |= seg->g << 23;
7987 flags |= seg->db << 22;
7988 flags |= seg->l << 21;
7989 flags |= seg->avl << 20;
7990 flags |= seg->present << 15;
7991 flags |= seg->dpl << 13;
7992 flags |= seg->s << 12;
7993 flags |= seg->type << 8;
7994 return flags;
7995}
7996
7997static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
7998{
7999 struct kvm_segment seg;
8000 int offset;
8001
8002 kvm_get_segment(vcpu, &seg, n);
8003 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
8004
8005 if (n < 3)
8006 offset = 0x7f84 + n * 12;
8007 else
8008 offset = 0x7f2c + (n - 3) * 12;
8009
8010 put_smstate(u32, buf, offset + 8, seg.base);
8011 put_smstate(u32, buf, offset + 4, seg.limit);
8012 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
8013}
8014
8015#ifdef CONFIG_X86_64
8016static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
8017{
8018 struct kvm_segment seg;
8019 int offset;
8020 u16 flags;
8021
8022 kvm_get_segment(vcpu, &seg, n);
8023 offset = 0x7e00 + n * 16;
8024
8025 flags = enter_smm_get_segment_flags(&seg) >> 8;
8026 put_smstate(u16, buf, offset, seg.selector);
8027 put_smstate(u16, buf, offset + 2, flags);
8028 put_smstate(u32, buf, offset + 4, seg.limit);
8029 put_smstate(u64, buf, offset + 8, seg.base);
8030}
8031#endif
8032
8033static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
8034{
8035 struct desc_ptr dt;
8036 struct kvm_segment seg;
8037 unsigned long val;
8038 int i;
8039
8040 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
8041 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
8042 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
8043 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
8044
8045 for (i = 0; i < 8; i++)
8046 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
8047
8048 kvm_get_dr(vcpu, 6, &val);
8049 put_smstate(u32, buf, 0x7fcc, (u32)val);
8050 kvm_get_dr(vcpu, 7, &val);
8051 put_smstate(u32, buf, 0x7fc8, (u32)val);
8052
8053 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
8054 put_smstate(u32, buf, 0x7fc4, seg.selector);
8055 put_smstate(u32, buf, 0x7f64, seg.base);
8056 put_smstate(u32, buf, 0x7f60, seg.limit);
8057 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
8058
8059 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
8060 put_smstate(u32, buf, 0x7fc0, seg.selector);
8061 put_smstate(u32, buf, 0x7f80, seg.base);
8062 put_smstate(u32, buf, 0x7f7c, seg.limit);
8063 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
8064
8065 kvm_x86_ops.get_gdt(vcpu, &dt);
8066 put_smstate(u32, buf, 0x7f74, dt.address);
8067 put_smstate(u32, buf, 0x7f70, dt.size);
8068
8069 kvm_x86_ops.get_idt(vcpu, &dt);
8070 put_smstate(u32, buf, 0x7f58, dt.address);
8071 put_smstate(u32, buf, 0x7f54, dt.size);
8072
8073 for (i = 0; i < 6; i++)
8074 enter_smm_save_seg_32(vcpu, buf, i);
8075
8076 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
8077
8078
8079 put_smstate(u32, buf, 0x7efc, 0x00020000);
8080 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
8081}
8082
8083#ifdef CONFIG_X86_64
8084static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
8085{
8086 struct desc_ptr dt;
8087 struct kvm_segment seg;
8088 unsigned long val;
8089 int i;
8090
8091 for (i = 0; i < 16; i++)
8092 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
8093
8094 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
8095 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
8096
8097 kvm_get_dr(vcpu, 6, &val);
8098 put_smstate(u64, buf, 0x7f68, val);
8099 kvm_get_dr(vcpu, 7, &val);
8100 put_smstate(u64, buf, 0x7f60, val);
8101
8102 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
8103 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
8104 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
8105
8106 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
8107
8108
8109 put_smstate(u32, buf, 0x7efc, 0x00020064);
8110
8111 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
8112
8113 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
8114 put_smstate(u16, buf, 0x7e90, seg.selector);
8115 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
8116 put_smstate(u32, buf, 0x7e94, seg.limit);
8117 put_smstate(u64, buf, 0x7e98, seg.base);
8118
8119 kvm_x86_ops.get_idt(vcpu, &dt);
8120 put_smstate(u32, buf, 0x7e84, dt.size);
8121 put_smstate(u64, buf, 0x7e88, dt.address);
8122
8123 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
8124 put_smstate(u16, buf, 0x7e70, seg.selector);
8125 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
8126 put_smstate(u32, buf, 0x7e74, seg.limit);
8127 put_smstate(u64, buf, 0x7e78, seg.base);
8128
8129 kvm_x86_ops.get_gdt(vcpu, &dt);
8130 put_smstate(u32, buf, 0x7e64, dt.size);
8131 put_smstate(u64, buf, 0x7e68, dt.address);
8132
8133 for (i = 0; i < 6; i++)
8134 enter_smm_save_seg_64(vcpu, buf, i);
8135}
8136#endif
8137
8138static void enter_smm(struct kvm_vcpu *vcpu)
8139{
8140 struct kvm_segment cs, ds;
8141 struct desc_ptr dt;
8142 char buf[512];
8143 u32 cr0;
8144
8145 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
8146 memset(buf, 0, 512);
8147#ifdef CONFIG_X86_64
8148 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
8149 enter_smm_save_state_64(vcpu, buf);
8150 else
8151#endif
8152 enter_smm_save_state_32(vcpu, buf);
8153
8154
8155
8156
8157
8158
8159 kvm_x86_ops.pre_enter_smm(vcpu, buf);
8160
8161 vcpu->arch.hflags |= HF_SMM_MASK;
8162 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
8163
8164 if (kvm_x86_ops.get_nmi_mask(vcpu))
8165 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
8166 else
8167 kvm_x86_ops.set_nmi_mask(vcpu, true);
8168
8169 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
8170 kvm_rip_write(vcpu, 0x8000);
8171
8172 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
8173 kvm_x86_ops.set_cr0(vcpu, cr0);
8174 vcpu->arch.cr0 = cr0;
8175
8176 kvm_x86_ops.set_cr4(vcpu, 0);
8177
8178
8179 dt.address = dt.size = 0;
8180 kvm_x86_ops.set_idt(vcpu, &dt);
8181
8182 __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
8183
8184 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
8185 cs.base = vcpu->arch.smbase;
8186
8187 ds.selector = 0;
8188 ds.base = 0;
8189
8190 cs.limit = ds.limit = 0xffffffff;
8191 cs.type = ds.type = 0x3;
8192 cs.dpl = ds.dpl = 0;
8193 cs.db = ds.db = 0;
8194 cs.s = ds.s = 1;
8195 cs.l = ds.l = 0;
8196 cs.g = ds.g = 1;
8197 cs.avl = ds.avl = 0;
8198 cs.present = ds.present = 1;
8199 cs.unusable = ds.unusable = 0;
8200 cs.padding = ds.padding = 0;
8201
8202 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
8203 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
8204 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
8205 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
8206 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
8207 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
8208
8209#ifdef CONFIG_X86_64
8210 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
8211 kvm_x86_ops.set_efer(vcpu, 0);
8212#endif
8213
8214 kvm_update_cpuid_runtime(vcpu);
8215 kvm_mmu_reset_context(vcpu);
8216}
8217
8218static void process_smi(struct kvm_vcpu *vcpu)
8219{
8220 vcpu->arch.smi_pending = true;
8221 kvm_make_request(KVM_REQ_EVENT, vcpu);
8222}
8223
8224void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
8225 unsigned long *vcpu_bitmap)
8226{
8227 cpumask_var_t cpus;
8228
8229 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
8230
8231 kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
8232 NULL, vcpu_bitmap, cpus);
8233
8234 free_cpumask_var(cpus);
8235}
8236
8237void kvm_make_scan_ioapic_request(struct kvm *kvm)
8238{
8239 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
8240}
8241
8242void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
8243{
8244 if (!lapic_in_kernel(vcpu))
8245 return;
8246
8247 vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
8248 kvm_apic_update_apicv(vcpu);
8249 kvm_x86_ops.refresh_apicv_exec_ctrl(vcpu);
8250}
8251EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
8252
8253
8254
8255
8256
8257
8258
8259
8260void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
8261{
8262 struct kvm_vcpu *except;
8263 unsigned long old, new, expected;
8264
8265 if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
8266 !kvm_x86_ops.check_apicv_inhibit_reasons(bit))
8267 return;
8268
8269 old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
8270 do {
8271 expected = new = old;
8272 if (activate)
8273 __clear_bit(bit, &new);
8274 else
8275 __set_bit(bit, &new);
8276 if (new == old)
8277 break;
8278 old = cmpxchg(&kvm->arch.apicv_inhibit_reasons, expected, new);
8279 } while (old != expected);
8280
8281 if (!!old == !!new)
8282 return;
8283
8284 trace_kvm_apicv_update_request(activate, bit);
8285 if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
8286 kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate);
8287
8288
8289
8290
8291
8292
8293 except = kvm_get_running_vcpu();
8294 kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE,
8295 except);
8296 if (except)
8297 kvm_vcpu_update_apicv(except);
8298}
8299EXPORT_SYMBOL_GPL(kvm_request_apicv_update);
8300
8301static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
8302{
8303 if (!kvm_apic_present(vcpu))
8304 return;
8305
8306 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
8307
8308 if (irqchip_split(vcpu->kvm))
8309 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
8310 else {
8311 if (vcpu->arch.apicv_active)
8312 kvm_x86_ops.sync_pir_to_irr(vcpu);
8313 if (ioapic_in_kernel(vcpu->kvm))
8314 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
8315 }
8316
8317 if (is_guest_mode(vcpu))
8318 vcpu->arch.load_eoi_exitmap_pending = true;
8319 else
8320 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
8321}
8322
8323static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
8324{
8325 u64 eoi_exit_bitmap[4];
8326
8327 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
8328 return;
8329
8330 bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
8331 vcpu_to_synic(vcpu)->vec_bitmap, 256);
8332 kvm_x86_ops.load_eoi_exitmap(vcpu, eoi_exit_bitmap);
8333}
8334
8335void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
8336 unsigned long start, unsigned long end)
8337{
8338 unsigned long apic_address;
8339
8340
8341
8342
8343
8344 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
8345 if (start <= apic_address && apic_address < end)
8346 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
8347}
8348
8349void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
8350{
8351 if (!lapic_in_kernel(vcpu))
8352 return;
8353
8354 if (!kvm_x86_ops.set_apic_access_page_addr)
8355 return;
8356
8357 kvm_x86_ops.set_apic_access_page_addr(vcpu);
8358}
8359
8360void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
8361{
8362 smp_send_reschedule(vcpu->cpu);
8363}
8364EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
8365
8366
8367
8368
8369
8370
8371static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
8372{
8373 int r;
8374 bool req_int_win =
8375 dm_request_for_irq_injection(vcpu) &&
8376 kvm_cpu_accept_dm_intr(vcpu);
8377 fastpath_t exit_fastpath;
8378
8379 bool req_immediate_exit = false;
8380
8381 if (kvm_request_pending(vcpu)) {
8382 if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
8383 if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) {
8384 r = 0;
8385 goto out;
8386 }
8387 }
8388 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
8389 kvm_mmu_unload(vcpu);
8390 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
8391 __kvm_migrate_timers(vcpu);
8392 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
8393 kvm_gen_update_masterclock(vcpu->kvm);
8394 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
8395 kvm_gen_kvmclock_update(vcpu);
8396 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
8397 r = kvm_guest_time_update(vcpu);
8398 if (unlikely(r))
8399 goto out;
8400 }
8401 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
8402 kvm_mmu_sync_roots(vcpu);
8403 if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
8404 kvm_mmu_load_pgd(vcpu);
8405 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
8406 kvm_vcpu_flush_tlb_all(vcpu);
8407
8408
8409 kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
8410 }
8411 if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
8412 kvm_vcpu_flush_tlb_current(vcpu);
8413 if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
8414 kvm_vcpu_flush_tlb_guest(vcpu);
8415
8416 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
8417 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
8418 r = 0;
8419 goto out;
8420 }
8421 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
8422 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
8423 vcpu->mmio_needed = 0;
8424 r = 0;
8425 goto out;
8426 }
8427 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
8428
8429 vcpu->arch.apf.halted = true;
8430 r = 1;
8431 goto out;
8432 }
8433 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
8434 record_steal_time(vcpu);
8435 if (kvm_check_request(KVM_REQ_SMI, vcpu))
8436 process_smi(vcpu);
8437 if (kvm_check_request(KVM_REQ_NMI, vcpu))
8438 process_nmi(vcpu);
8439 if (kvm_check_request(KVM_REQ_PMU, vcpu))
8440 kvm_pmu_handle_event(vcpu);
8441 if (kvm_check_request(KVM_REQ_PMI, vcpu))
8442 kvm_pmu_deliver_pmi(vcpu);
8443 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
8444 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
8445 if (test_bit(vcpu->arch.pending_ioapic_eoi,
8446 vcpu->arch.ioapic_handled_vectors)) {
8447 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
8448 vcpu->run->eoi.vector =
8449 vcpu->arch.pending_ioapic_eoi;
8450 r = 0;
8451 goto out;
8452 }
8453 }
8454 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
8455 vcpu_scan_ioapic(vcpu);
8456 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
8457 vcpu_load_eoi_exitmap(vcpu);
8458 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
8459 kvm_vcpu_reload_apic_access_page(vcpu);
8460 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
8461 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
8462 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
8463 r = 0;
8464 goto out;
8465 }
8466 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
8467 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
8468 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
8469 r = 0;
8470 goto out;
8471 }
8472 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
8473 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
8474 vcpu->run->hyperv = vcpu->arch.hyperv.exit;
8475 r = 0;
8476 goto out;
8477 }
8478
8479
8480
8481
8482
8483
8484 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
8485 kvm_hv_process_stimers(vcpu);
8486 if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
8487 kvm_vcpu_update_apicv(vcpu);
8488 if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
8489 kvm_check_async_pf_completion(vcpu);
8490 }
8491
8492 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
8493 ++vcpu->stat.req_event;
8494 kvm_apic_accept_events(vcpu);
8495 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
8496 r = 1;
8497 goto out;
8498 }
8499
8500 inject_pending_event(vcpu, &req_immediate_exit);
8501 if (req_int_win)
8502 kvm_x86_ops.enable_irq_window(vcpu);
8503
8504 if (kvm_lapic_enabled(vcpu)) {
8505 update_cr8_intercept(vcpu);
8506 kvm_lapic_sync_to_vapic(vcpu);
8507 }
8508 }
8509
8510 r = kvm_mmu_reload(vcpu);
8511 if (unlikely(r)) {
8512 goto cancel_injection;
8513 }
8514
8515 preempt_disable();
8516
8517 kvm_x86_ops.prepare_guest_switch(vcpu);
8518
8519
8520
8521
8522
8523
8524 local_irq_disable();
8525 vcpu->mode = IN_GUEST_MODE;
8526
8527 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541 smp_mb__after_srcu_read_unlock();
8542
8543
8544
8545
8546
8547 if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
8548 kvm_x86_ops.sync_pir_to_irr(vcpu);
8549
8550 if (kvm_vcpu_exit_request(vcpu)) {
8551 vcpu->mode = OUTSIDE_GUEST_MODE;
8552 smp_wmb();
8553 local_irq_enable();
8554 preempt_enable();
8555 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8556 r = 1;
8557 goto cancel_injection;
8558 }
8559
8560 if (req_immediate_exit) {
8561 kvm_make_request(KVM_REQ_EVENT, vcpu);
8562 kvm_x86_ops.request_immediate_exit(vcpu);
8563 }
8564
8565 trace_kvm_entry(vcpu->vcpu_id);
8566
8567 fpregs_assert_state_consistent();
8568 if (test_thread_flag(TIF_NEED_FPU_LOAD))
8569 switch_fpu_return();
8570
8571 if (unlikely(vcpu->arch.switch_db_regs)) {
8572 set_debugreg(0, 7);
8573 set_debugreg(vcpu->arch.eff_db[0], 0);
8574 set_debugreg(vcpu->arch.eff_db[1], 1);
8575 set_debugreg(vcpu->arch.eff_db[2], 2);
8576 set_debugreg(vcpu->arch.eff_db[3], 3);
8577 set_debugreg(vcpu->arch.dr6, 6);
8578 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8579 }
8580
8581 exit_fastpath = kvm_x86_ops.run(vcpu);
8582
8583
8584
8585
8586
8587
8588
8589 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
8590 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
8591 kvm_x86_ops.sync_dirty_debug_regs(vcpu);
8592 kvm_update_dr0123(vcpu);
8593 kvm_update_dr7(vcpu);
8594 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8595 }
8596
8597
8598
8599
8600
8601
8602
8603
8604 if (hw_breakpoint_active())
8605 hw_breakpoint_restore();
8606
8607 vcpu->arch.last_vmentry_cpu = vcpu->cpu;
8608 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
8609
8610 vcpu->mode = OUTSIDE_GUEST_MODE;
8611 smp_wmb();
8612
8613 kvm_x86_ops.handle_exit_irqoff(vcpu);
8614
8615
8616
8617
8618
8619
8620
8621
8622 kvm_before_interrupt(vcpu);
8623 local_irq_enable();
8624 ++vcpu->stat.exits;
8625 local_irq_disable();
8626 kvm_after_interrupt(vcpu);
8627
8628 if (lapic_in_kernel(vcpu)) {
8629 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
8630 if (delta != S64_MIN) {
8631 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
8632 vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
8633 }
8634 }
8635
8636 local_irq_enable();
8637 preempt_enable();
8638
8639 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8640
8641
8642
8643
8644 if (unlikely(prof_on == KVM_PROFILING)) {
8645 unsigned long rip = kvm_rip_read(vcpu);
8646 profile_hit(KVM_PROFILING, (void *)rip);
8647 }
8648
8649 if (unlikely(vcpu->arch.tsc_always_catchup))
8650 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8651
8652 if (vcpu->arch.apic_attention)
8653 kvm_lapic_sync_from_vapic(vcpu);
8654
8655 r = kvm_x86_ops.handle_exit(vcpu, exit_fastpath);
8656 return r;
8657
8658cancel_injection:
8659 if (req_immediate_exit)
8660 kvm_make_request(KVM_REQ_EVENT, vcpu);
8661 kvm_x86_ops.cancel_injection(vcpu);
8662 if (unlikely(vcpu->arch.apic_attention))
8663 kvm_lapic_sync_from_vapic(vcpu);
8664out:
8665 return r;
8666}
8667
8668static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
8669{
8670 if (!kvm_arch_vcpu_runnable(vcpu) &&
8671 (!kvm_x86_ops.pre_block || kvm_x86_ops.pre_block(vcpu) == 0)) {
8672 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8673 kvm_vcpu_block(vcpu);
8674 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8675
8676 if (kvm_x86_ops.post_block)
8677 kvm_x86_ops.post_block(vcpu);
8678
8679 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
8680 return 1;
8681 }
8682
8683 kvm_apic_accept_events(vcpu);
8684 switch(vcpu->arch.mp_state) {
8685 case KVM_MP_STATE_HALTED:
8686 vcpu->arch.pv.pv_unhalted = false;
8687 vcpu->arch.mp_state =
8688 KVM_MP_STATE_RUNNABLE;
8689 fallthrough;
8690 case KVM_MP_STATE_RUNNABLE:
8691 vcpu->arch.apf.halted = false;
8692 break;
8693 case KVM_MP_STATE_INIT_RECEIVED:
8694 break;
8695 default:
8696 return -EINTR;
8697 }
8698 return 1;
8699}
8700
8701static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
8702{
8703 if (is_guest_mode(vcpu))
8704 kvm_x86_ops.nested_ops->check_events(vcpu);
8705
8706 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
8707 !vcpu->arch.apf.halted);
8708}
8709
8710static int vcpu_run(struct kvm_vcpu *vcpu)
8711{
8712 int r;
8713 struct kvm *kvm = vcpu->kvm;
8714
8715 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8716 vcpu->arch.l1tf_flush_l1d = true;
8717
8718 for (;;) {
8719 if (kvm_vcpu_running(vcpu)) {
8720 r = vcpu_enter_guest(vcpu);
8721 } else {
8722 r = vcpu_block(kvm, vcpu);
8723 }
8724
8725 if (r <= 0)
8726 break;
8727
8728 kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
8729 if (kvm_cpu_has_pending_timer(vcpu))
8730 kvm_inject_pending_timer_irqs(vcpu);
8731
8732 if (dm_request_for_irq_injection(vcpu) &&
8733 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
8734 r = 0;
8735 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
8736 ++vcpu->stat.request_irq_exits;
8737 break;
8738 }
8739
8740 if (__xfer_to_guest_mode_work_pending()) {
8741 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8742 r = xfer_to_guest_mode_handle_work(vcpu);
8743 if (r)
8744 return r;
8745 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8746 }
8747 }
8748
8749 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8750
8751 return r;
8752}
8753
8754static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
8755{
8756 int r;
8757
8758 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8759 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
8760 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8761 return r;
8762}
8763
8764static int complete_emulated_pio(struct kvm_vcpu *vcpu)
8765{
8766 BUG_ON(!vcpu->arch.pio.count);
8767
8768 return complete_emulated_io(vcpu);
8769}
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
8790{
8791 struct kvm_run *run = vcpu->run;
8792 struct kvm_mmio_fragment *frag;
8793 unsigned len;
8794
8795 BUG_ON(!vcpu->mmio_needed);
8796
8797
8798 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
8799 len = min(8u, frag->len);
8800 if (!vcpu->mmio_is_write)
8801 memcpy(frag->data, run->mmio.data, len);
8802
8803 if (frag->len <= 8) {
8804
8805 frag++;
8806 vcpu->mmio_cur_fragment++;
8807 } else {
8808
8809 frag->data += len;
8810 frag->gpa += len;
8811 frag->len -= len;
8812 }
8813
8814 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
8815 vcpu->mmio_needed = 0;
8816
8817
8818 if (vcpu->mmio_is_write)
8819 return 1;
8820 vcpu->mmio_read_completed = 1;
8821 return complete_emulated_io(vcpu);
8822 }
8823
8824 run->exit_reason = KVM_EXIT_MMIO;
8825 run->mmio.phys_addr = frag->gpa;
8826 if (vcpu->mmio_is_write)
8827 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
8828 run->mmio.len = min(8u, frag->len);
8829 run->mmio.is_write = vcpu->mmio_is_write;
8830 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
8831 return 0;
8832}
8833
8834static void kvm_save_current_fpu(struct fpu *fpu)
8835{
8836
8837
8838
8839
8840 if (test_thread_flag(TIF_NEED_FPU_LOAD))
8841 memcpy(&fpu->state, ¤t->thread.fpu.state,
8842 fpu_kernel_xstate_size);
8843 else
8844 copy_fpregs_to_fpstate(fpu);
8845}
8846
8847
8848static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
8849{
8850 fpregs_lock();
8851
8852 kvm_save_current_fpu(vcpu->arch.user_fpu);
8853
8854
8855 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
8856 ~XFEATURE_MASK_PKRU);
8857
8858 fpregs_mark_activate();
8859 fpregs_unlock();
8860
8861 trace_kvm_fpu(1);
8862}
8863
8864
8865static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
8866{
8867 fpregs_lock();
8868
8869 kvm_save_current_fpu(vcpu->arch.guest_fpu);
8870
8871 copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
8872
8873 fpregs_mark_activate();
8874 fpregs_unlock();
8875
8876 ++vcpu->stat.fpu_reload;
8877 trace_kvm_fpu(0);
8878}
8879
8880int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
8881{
8882 struct kvm_run *kvm_run = vcpu->run;
8883 int r;
8884
8885 vcpu_load(vcpu);
8886 kvm_sigset_activate(vcpu);
8887 kvm_load_guest_fpu(vcpu);
8888
8889 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
8890 if (kvm_run->immediate_exit) {
8891 r = -EINTR;
8892 goto out;
8893 }
8894 kvm_vcpu_block(vcpu);
8895 kvm_apic_accept_events(vcpu);
8896 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
8897 r = -EAGAIN;
8898 if (signal_pending(current)) {
8899 r = -EINTR;
8900 kvm_run->exit_reason = KVM_EXIT_INTR;
8901 ++vcpu->stat.signal_exits;
8902 }
8903 goto out;
8904 }
8905
8906 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
8907 r = -EINVAL;
8908 goto out;
8909 }
8910
8911 if (kvm_run->kvm_dirty_regs) {
8912 r = sync_regs(vcpu);
8913 if (r != 0)
8914 goto out;
8915 }
8916
8917
8918 if (!lapic_in_kernel(vcpu)) {
8919 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
8920 r = -EINVAL;
8921 goto out;
8922 }
8923 }
8924
8925 if (unlikely(vcpu->arch.complete_userspace_io)) {
8926 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
8927 vcpu->arch.complete_userspace_io = NULL;
8928 r = cui(vcpu);
8929 if (r <= 0)
8930 goto out;
8931 } else
8932 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
8933
8934 if (kvm_run->immediate_exit)
8935 r = -EINTR;
8936 else
8937 r = vcpu_run(vcpu);
8938
8939out:
8940 kvm_put_guest_fpu(vcpu);
8941 if (kvm_run->kvm_valid_regs)
8942 store_regs(vcpu);
8943 post_kvm_run_save(vcpu);
8944 kvm_sigset_deactivate(vcpu);
8945
8946 vcpu_put(vcpu);
8947 return r;
8948}
8949
8950static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8951{
8952 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
8953
8954
8955
8956
8957
8958
8959
8960 emulator_writeback_register_cache(vcpu->arch.emulate_ctxt);
8961 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8962 }
8963 regs->rax = kvm_rax_read(vcpu);
8964 regs->rbx = kvm_rbx_read(vcpu);
8965 regs->rcx = kvm_rcx_read(vcpu);
8966 regs->rdx = kvm_rdx_read(vcpu);
8967 regs->rsi = kvm_rsi_read(vcpu);
8968 regs->rdi = kvm_rdi_read(vcpu);
8969 regs->rsp = kvm_rsp_read(vcpu);
8970 regs->rbp = kvm_rbp_read(vcpu);
8971#ifdef CONFIG_X86_64
8972 regs->r8 = kvm_r8_read(vcpu);
8973 regs->r9 = kvm_r9_read(vcpu);
8974 regs->r10 = kvm_r10_read(vcpu);
8975 regs->r11 = kvm_r11_read(vcpu);
8976 regs->r12 = kvm_r12_read(vcpu);
8977 regs->r13 = kvm_r13_read(vcpu);
8978 regs->r14 = kvm_r14_read(vcpu);
8979 regs->r15 = kvm_r15_read(vcpu);
8980#endif
8981
8982 regs->rip = kvm_rip_read(vcpu);
8983 regs->rflags = kvm_get_rflags(vcpu);
8984}
8985
8986int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8987{
8988 vcpu_load(vcpu);
8989 __get_regs(vcpu, regs);
8990 vcpu_put(vcpu);
8991 return 0;
8992}
8993
8994static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8995{
8996 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
8997 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8998
8999 kvm_rax_write(vcpu, regs->rax);
9000 kvm_rbx_write(vcpu, regs->rbx);
9001 kvm_rcx_write(vcpu, regs->rcx);
9002 kvm_rdx_write(vcpu, regs->rdx);
9003 kvm_rsi_write(vcpu, regs->rsi);
9004 kvm_rdi_write(vcpu, regs->rdi);
9005 kvm_rsp_write(vcpu, regs->rsp);
9006 kvm_rbp_write(vcpu, regs->rbp);
9007#ifdef CONFIG_X86_64
9008 kvm_r8_write(vcpu, regs->r8);
9009 kvm_r9_write(vcpu, regs->r9);
9010 kvm_r10_write(vcpu, regs->r10);
9011 kvm_r11_write(vcpu, regs->r11);
9012 kvm_r12_write(vcpu, regs->r12);
9013 kvm_r13_write(vcpu, regs->r13);
9014 kvm_r14_write(vcpu, regs->r14);
9015 kvm_r15_write(vcpu, regs->r15);
9016#endif
9017
9018 kvm_rip_write(vcpu, regs->rip);
9019 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
9020
9021 vcpu->arch.exception.pending = false;
9022
9023 kvm_make_request(KVM_REQ_EVENT, vcpu);
9024}
9025
9026int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
9027{
9028 vcpu_load(vcpu);
9029 __set_regs(vcpu, regs);
9030 vcpu_put(vcpu);
9031 return 0;
9032}
9033
9034void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
9035{
9036 struct kvm_segment cs;
9037
9038 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
9039 *db = cs.db;
9040 *l = cs.l;
9041}
9042EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
9043
9044static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
9045{
9046 struct desc_ptr dt;
9047
9048 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
9049 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
9050 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
9051 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
9052 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
9053 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
9054
9055 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
9056 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
9057
9058 kvm_x86_ops.get_idt(vcpu, &dt);
9059 sregs->idt.limit = dt.size;
9060 sregs->idt.base = dt.address;
9061 kvm_x86_ops.get_gdt(vcpu, &dt);
9062 sregs->gdt.limit = dt.size;
9063 sregs->gdt.base = dt.address;
9064
9065 sregs->cr0 = kvm_read_cr0(vcpu);
9066 sregs->cr2 = vcpu->arch.cr2;
9067 sregs->cr3 = kvm_read_cr3(vcpu);
9068 sregs->cr4 = kvm_read_cr4(vcpu);
9069 sregs->cr8 = kvm_get_cr8(vcpu);
9070 sregs->efer = vcpu->arch.efer;
9071 sregs->apic_base = kvm_get_apic_base(vcpu);
9072
9073 memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap));
9074
9075 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
9076 set_bit(vcpu->arch.interrupt.nr,
9077 (unsigned long *)sregs->interrupt_bitmap);
9078}
9079
9080int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
9081 struct kvm_sregs *sregs)
9082{
9083 vcpu_load(vcpu);
9084 __get_sregs(vcpu, sregs);
9085 vcpu_put(vcpu);
9086 return 0;
9087}
9088
9089int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
9090 struct kvm_mp_state *mp_state)
9091{
9092 vcpu_load(vcpu);
9093 if (kvm_mpx_supported())
9094 kvm_load_guest_fpu(vcpu);
9095
9096 kvm_apic_accept_events(vcpu);
9097 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
9098 vcpu->arch.pv.pv_unhalted)
9099 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
9100 else
9101 mp_state->mp_state = vcpu->arch.mp_state;
9102
9103 if (kvm_mpx_supported())
9104 kvm_put_guest_fpu(vcpu);
9105 vcpu_put(vcpu);
9106 return 0;
9107}
9108
9109int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
9110 struct kvm_mp_state *mp_state)
9111{
9112 int ret = -EINVAL;
9113
9114 vcpu_load(vcpu);
9115
9116 if (!lapic_in_kernel(vcpu) &&
9117 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
9118 goto out;
9119
9120
9121
9122
9123
9124
9125 if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
9126 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
9127 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
9128 goto out;
9129
9130 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
9131 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
9132 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
9133 } else
9134 vcpu->arch.mp_state = mp_state->mp_state;
9135 kvm_make_request(KVM_REQ_EVENT, vcpu);
9136
9137 ret = 0;
9138out:
9139 vcpu_put(vcpu);
9140 return ret;
9141}
9142
9143int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
9144 int reason, bool has_error_code, u32 error_code)
9145{
9146 struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
9147 int ret;
9148
9149 init_emulate_ctxt(vcpu);
9150
9151 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
9152 has_error_code, error_code);
9153 if (ret) {
9154 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
9155 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
9156 vcpu->run->internal.ndata = 0;
9157 return 0;
9158 }
9159
9160 kvm_rip_write(vcpu, ctxt->eip);
9161 kvm_set_rflags(vcpu, ctxt->eflags);
9162 return 1;
9163}
9164EXPORT_SYMBOL_GPL(kvm_task_switch);
9165
9166static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
9167{
9168 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
9169
9170
9171
9172
9173
9174 if (!(sregs->cr4 & X86_CR4_PAE)
9175 || !(sregs->efer & EFER_LMA))
9176 return -EINVAL;
9177 } else {
9178
9179
9180
9181
9182 if (sregs->efer & EFER_LMA || sregs->cs.l)
9183 return -EINVAL;
9184 }
9185
9186 return kvm_valid_cr4(vcpu, sregs->cr4);
9187}
9188
9189static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
9190{
9191 struct msr_data apic_base_msr;
9192 int mmu_reset_needed = 0;
9193 int cpuid_update_needed = 0;
9194 int pending_vec, max_bits, idx;
9195 struct desc_ptr dt;
9196 int ret = -EINVAL;
9197
9198 if (kvm_valid_sregs(vcpu, sregs))
9199 goto out;
9200
9201 apic_base_msr.data = sregs->apic_base;
9202 apic_base_msr.host_initiated = true;
9203 if (kvm_set_apic_base(vcpu, &apic_base_msr))
9204 goto out;
9205
9206 dt.size = sregs->idt.limit;
9207 dt.address = sregs->idt.base;
9208 kvm_x86_ops.set_idt(vcpu, &dt);
9209 dt.size = sregs->gdt.limit;
9210 dt.address = sregs->gdt.base;
9211 kvm_x86_ops.set_gdt(vcpu, &dt);
9212
9213 vcpu->arch.cr2 = sregs->cr2;
9214 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
9215 vcpu->arch.cr3 = sregs->cr3;
9216 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
9217
9218 kvm_set_cr8(vcpu, sregs->cr8);
9219
9220 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
9221 kvm_x86_ops.set_efer(vcpu, sregs->efer);
9222
9223 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
9224 kvm_x86_ops.set_cr0(vcpu, sregs->cr0);
9225 vcpu->arch.cr0 = sregs->cr0;
9226
9227 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
9228 cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
9229 (X86_CR4_OSXSAVE | X86_CR4_PKE));
9230 kvm_x86_ops.set_cr4(vcpu, sregs->cr4);
9231 if (cpuid_update_needed)
9232 kvm_update_cpuid_runtime(vcpu);
9233
9234 idx = srcu_read_lock(&vcpu->kvm->srcu);
9235 if (is_pae_paging(vcpu)) {
9236 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
9237 mmu_reset_needed = 1;
9238 }
9239 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9240
9241 if (mmu_reset_needed)
9242 kvm_mmu_reset_context(vcpu);
9243
9244 max_bits = KVM_NR_INTERRUPTS;
9245 pending_vec = find_first_bit(
9246 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
9247 if (pending_vec < max_bits) {
9248 kvm_queue_interrupt(vcpu, pending_vec, false);
9249 pr_debug("Set back pending irq %d\n", pending_vec);
9250 }
9251
9252 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
9253 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
9254 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
9255 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
9256 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
9257 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
9258
9259 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
9260 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
9261
9262 update_cr8_intercept(vcpu);
9263
9264
9265 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
9266 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
9267 !is_protmode(vcpu))
9268 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9269
9270 kvm_make_request(KVM_REQ_EVENT, vcpu);
9271
9272 ret = 0;
9273out:
9274 return ret;
9275}
9276
9277int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
9278 struct kvm_sregs *sregs)
9279{
9280 int ret;
9281
9282 vcpu_load(vcpu);
9283 ret = __set_sregs(vcpu, sregs);
9284 vcpu_put(vcpu);
9285 return ret;
9286}
9287
9288int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
9289 struct kvm_guest_debug *dbg)
9290{
9291 unsigned long rflags;
9292 int i, r;
9293
9294 vcpu_load(vcpu);
9295
9296 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
9297 r = -EBUSY;
9298 if (vcpu->arch.exception.pending)
9299 goto out;
9300 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
9301 kvm_queue_exception(vcpu, DB_VECTOR);
9302 else
9303 kvm_queue_exception(vcpu, BP_VECTOR);
9304 }
9305
9306
9307
9308
9309
9310 rflags = kvm_get_rflags(vcpu);
9311
9312 vcpu->guest_debug = dbg->control;
9313 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
9314 vcpu->guest_debug = 0;
9315
9316 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
9317 for (i = 0; i < KVM_NR_DB_REGS; ++i)
9318 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
9319 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
9320 } else {
9321 for (i = 0; i < KVM_NR_DB_REGS; i++)
9322 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
9323 }
9324 kvm_update_dr7(vcpu);
9325
9326 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
9327 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
9328 get_segment_base(vcpu, VCPU_SREG_CS);
9329
9330
9331
9332
9333
9334 kvm_set_rflags(vcpu, rflags);
9335
9336 kvm_x86_ops.update_exception_bitmap(vcpu);
9337
9338 r = 0;
9339
9340out:
9341 vcpu_put(vcpu);
9342 return r;
9343}
9344
9345
9346
9347
9348int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
9349 struct kvm_translation *tr)
9350{
9351 unsigned long vaddr = tr->linear_address;
9352 gpa_t gpa;
9353 int idx;
9354
9355 vcpu_load(vcpu);
9356
9357 idx = srcu_read_lock(&vcpu->kvm->srcu);
9358 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
9359 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9360 tr->physical_address = gpa;
9361 tr->valid = gpa != UNMAPPED_GVA;
9362 tr->writeable = 1;
9363 tr->usermode = 0;
9364
9365 vcpu_put(vcpu);
9366 return 0;
9367}
9368
9369int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9370{
9371 struct fxregs_state *fxsave;
9372
9373 vcpu_load(vcpu);
9374
9375 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
9376 memcpy(fpu->fpr, fxsave->st_space, 128);
9377 fpu->fcw = fxsave->cwd;
9378 fpu->fsw = fxsave->swd;
9379 fpu->ftwx = fxsave->twd;
9380 fpu->last_opcode = fxsave->fop;
9381 fpu->last_ip = fxsave->rip;
9382 fpu->last_dp = fxsave->rdp;
9383 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
9384
9385 vcpu_put(vcpu);
9386 return 0;
9387}
9388
9389int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9390{
9391 struct fxregs_state *fxsave;
9392
9393 vcpu_load(vcpu);
9394
9395 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
9396
9397 memcpy(fxsave->st_space, fpu->fpr, 128);
9398 fxsave->cwd = fpu->fcw;
9399 fxsave->swd = fpu->fsw;
9400 fxsave->twd = fpu->ftwx;
9401 fxsave->fop = fpu->last_opcode;
9402 fxsave->rip = fpu->last_ip;
9403 fxsave->rdp = fpu->last_dp;
9404 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
9405
9406 vcpu_put(vcpu);
9407 return 0;
9408}
9409
9410static void store_regs(struct kvm_vcpu *vcpu)
9411{
9412 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
9413
9414 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
9415 __get_regs(vcpu, &vcpu->run->s.regs.regs);
9416
9417 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
9418 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
9419
9420 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
9421 kvm_vcpu_ioctl_x86_get_vcpu_events(
9422 vcpu, &vcpu->run->s.regs.events);
9423}
9424
9425static int sync_regs(struct kvm_vcpu *vcpu)
9426{
9427 if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
9428 return -EINVAL;
9429
9430 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
9431 __set_regs(vcpu, &vcpu->run->s.regs.regs);
9432 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
9433 }
9434 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
9435 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
9436 return -EINVAL;
9437 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
9438 }
9439 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
9440 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
9441 vcpu, &vcpu->run->s.regs.events))
9442 return -EINVAL;
9443 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
9444 }
9445
9446 return 0;
9447}
9448
9449static void fx_init(struct kvm_vcpu *vcpu)
9450{
9451 fpstate_init(&vcpu->arch.guest_fpu->state);
9452 if (boot_cpu_has(X86_FEATURE_XSAVES))
9453 vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
9454 host_xcr0 | XSTATE_COMPACTION_ENABLED;
9455
9456
9457
9458
9459 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
9460
9461 vcpu->arch.cr0 |= X86_CR0_ET;
9462}
9463
9464int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
9465{
9466 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
9467 pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
9468 "guest TSC will not be reliable\n");
9469
9470 return 0;
9471}
9472
9473int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
9474{
9475 struct page *page;
9476 int r;
9477
9478 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
9479 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9480 else
9481 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
9482
9483 kvm_set_tsc_khz(vcpu, max_tsc_khz);
9484
9485 r = kvm_mmu_create(vcpu);
9486 if (r < 0)
9487 return r;
9488
9489 if (irqchip_in_kernel(vcpu->kvm)) {
9490 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
9491 if (r < 0)
9492 goto fail_mmu_destroy;
9493 if (kvm_apicv_activated(vcpu->kvm))
9494 vcpu->arch.apicv_active = true;
9495 } else
9496 static_key_slow_inc(&kvm_no_apic_vcpu);
9497
9498 r = -ENOMEM;
9499
9500 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
9501 if (!page)
9502 goto fail_free_lapic;
9503 vcpu->arch.pio_data = page_address(page);
9504
9505 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
9506 GFP_KERNEL_ACCOUNT);
9507 if (!vcpu->arch.mce_banks)
9508 goto fail_free_pio_data;
9509 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
9510
9511 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
9512 GFP_KERNEL_ACCOUNT))
9513 goto fail_free_mce_banks;
9514
9515 if (!alloc_emulate_ctxt(vcpu))
9516 goto free_wbinvd_dirty_mask;
9517
9518 vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
9519 GFP_KERNEL_ACCOUNT);
9520 if (!vcpu->arch.user_fpu) {
9521 pr_err("kvm: failed to allocate userspace's fpu\n");
9522 goto free_emulate_ctxt;
9523 }
9524
9525 vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
9526 GFP_KERNEL_ACCOUNT);
9527 if (!vcpu->arch.guest_fpu) {
9528 pr_err("kvm: failed to allocate vcpu's fpu\n");
9529 goto free_user_fpu;
9530 }
9531 fx_init(vcpu);
9532
9533 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
9534
9535 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
9536
9537 kvm_async_pf_hash_reset(vcpu);
9538 kvm_pmu_init(vcpu);
9539
9540 vcpu->arch.pending_external_vector = -1;
9541 vcpu->arch.preempted_in_kernel = false;
9542
9543 kvm_hv_vcpu_init(vcpu);
9544
9545 r = kvm_x86_ops.vcpu_create(vcpu);
9546 if (r)
9547 goto free_guest_fpu;
9548
9549 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
9550 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
9551 kvm_vcpu_mtrr_init(vcpu);
9552 vcpu_load(vcpu);
9553 kvm_vcpu_reset(vcpu, false);
9554 kvm_init_mmu(vcpu, false);
9555 vcpu_put(vcpu);
9556 return 0;
9557
9558free_guest_fpu:
9559 kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
9560free_user_fpu:
9561 kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
9562free_emulate_ctxt:
9563 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
9564free_wbinvd_dirty_mask:
9565 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
9566fail_free_mce_banks:
9567 kfree(vcpu->arch.mce_banks);
9568fail_free_pio_data:
9569 free_page((unsigned long)vcpu->arch.pio_data);
9570fail_free_lapic:
9571 kvm_free_lapic(vcpu);
9572fail_mmu_destroy:
9573 kvm_mmu_destroy(vcpu);
9574 return r;
9575}
9576
9577void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
9578{
9579 struct msr_data msr;
9580 struct kvm *kvm = vcpu->kvm;
9581
9582 kvm_hv_vcpu_postcreate(vcpu);
9583
9584 if (mutex_lock_killable(&vcpu->mutex))
9585 return;
9586 vcpu_load(vcpu);
9587 msr.data = 0x0;
9588 msr.index = MSR_IA32_TSC;
9589 msr.host_initiated = true;
9590 kvm_write_tsc(vcpu, &msr);
9591 vcpu_put(vcpu);
9592
9593
9594 vcpu->arch.msr_kvm_poll_control = 1;
9595
9596 mutex_unlock(&vcpu->mutex);
9597
9598 if (kvmclock_periodic_sync && vcpu->vcpu_idx == 0)
9599 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
9600 KVMCLOCK_SYNC_PERIOD);
9601}
9602
9603void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
9604{
9605 struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
9606 int idx;
9607
9608 kvm_release_pfn(cache->pfn, cache->dirty, cache);
9609
9610 kvmclock_reset(vcpu);
9611
9612 kvm_x86_ops.vcpu_free(vcpu);
9613
9614 kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
9615 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
9616 kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
9617 kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
9618
9619 kvm_hv_vcpu_uninit(vcpu);
9620 kvm_pmu_destroy(vcpu);
9621 kfree(vcpu->arch.mce_banks);
9622 kvm_free_lapic(vcpu);
9623 idx = srcu_read_lock(&vcpu->kvm->srcu);
9624 kvm_mmu_destroy(vcpu);
9625 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9626 free_page((unsigned long)vcpu->arch.pio_data);
9627 if (!lapic_in_kernel(vcpu))
9628 static_key_slow_dec(&kvm_no_apic_vcpu);
9629}
9630
9631void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
9632{
9633 kvm_lapic_reset(vcpu, init_event);
9634
9635 vcpu->arch.hflags = 0;
9636
9637 vcpu->arch.smi_pending = 0;
9638 vcpu->arch.smi_count = 0;
9639 atomic_set(&vcpu->arch.nmi_queued, 0);
9640 vcpu->arch.nmi_pending = 0;
9641 vcpu->arch.nmi_injected = false;
9642 kvm_clear_interrupt_queue(vcpu);
9643 kvm_clear_exception_queue(vcpu);
9644
9645 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
9646 kvm_update_dr0123(vcpu);
9647 vcpu->arch.dr6 = DR6_INIT;
9648 vcpu->arch.dr7 = DR7_FIXED_1;
9649 kvm_update_dr7(vcpu);
9650
9651 vcpu->arch.cr2 = 0;
9652
9653 kvm_make_request(KVM_REQ_EVENT, vcpu);
9654 vcpu->arch.apf.msr_en_val = 0;
9655 vcpu->arch.apf.msr_int_val = 0;
9656 vcpu->arch.st.msr_val = 0;
9657
9658 kvmclock_reset(vcpu);
9659
9660 kvm_clear_async_pf_completion_queue(vcpu);
9661 kvm_async_pf_hash_reset(vcpu);
9662 vcpu->arch.apf.halted = false;
9663
9664 if (kvm_mpx_supported()) {
9665 void *mpx_state_buffer;
9666
9667
9668
9669
9670
9671 if (init_event)
9672 kvm_put_guest_fpu(vcpu);
9673 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
9674 XFEATURE_BNDREGS);
9675 if (mpx_state_buffer)
9676 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
9677 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
9678 XFEATURE_BNDCSR);
9679 if (mpx_state_buffer)
9680 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
9681 if (init_event)
9682 kvm_load_guest_fpu(vcpu);
9683 }
9684
9685 if (!init_event) {
9686 kvm_pmu_reset(vcpu);
9687 vcpu->arch.smbase = 0x30000;
9688
9689 vcpu->arch.msr_misc_features_enables = 0;
9690
9691 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
9692 }
9693
9694 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
9695 vcpu->arch.regs_avail = ~0;
9696 vcpu->arch.regs_dirty = ~0;
9697
9698 vcpu->arch.ia32_xss = 0;
9699
9700 kvm_x86_ops.vcpu_reset(vcpu, init_event);
9701}
9702
9703void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
9704{
9705 struct kvm_segment cs;
9706
9707 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
9708 cs.selector = vector << 8;
9709 cs.base = vector << 12;
9710 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
9711 kvm_rip_write(vcpu, 0);
9712}
9713
9714int kvm_arch_hardware_enable(void)
9715{
9716 struct kvm *kvm;
9717 struct kvm_vcpu *vcpu;
9718 int i;
9719 int ret;
9720 u64 local_tsc;
9721 u64 max_tsc = 0;
9722 bool stable, backwards_tsc = false;
9723
9724 kvm_shared_msr_cpu_online();
9725 ret = kvm_x86_ops.hardware_enable();
9726 if (ret != 0)
9727 return ret;
9728
9729 local_tsc = rdtsc();
9730 stable = !kvm_check_tsc_unstable();
9731 list_for_each_entry(kvm, &vm_list, vm_list) {
9732 kvm_for_each_vcpu(i, vcpu, kvm) {
9733 if (!stable && vcpu->cpu == smp_processor_id())
9734 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
9735 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
9736 backwards_tsc = true;
9737 if (vcpu->arch.last_host_tsc > max_tsc)
9738 max_tsc = vcpu->arch.last_host_tsc;
9739 }
9740 }
9741 }
9742
9743
9744
9745
9746
9747
9748
9749
9750
9751
9752
9753
9754
9755
9756
9757
9758
9759
9760
9761
9762
9763
9764
9765
9766
9767
9768
9769
9770
9771
9772
9773
9774
9775
9776
9777
9778
9779
9780
9781 if (backwards_tsc) {
9782 u64 delta_cyc = max_tsc - local_tsc;
9783 list_for_each_entry(kvm, &vm_list, vm_list) {
9784 kvm->arch.backwards_tsc_observed = true;
9785 kvm_for_each_vcpu(i, vcpu, kvm) {
9786 vcpu->arch.tsc_offset_adjustment += delta_cyc;
9787 vcpu->arch.last_host_tsc = local_tsc;
9788 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
9789 }
9790
9791
9792
9793
9794
9795
9796
9797 kvm->arch.last_tsc_nsec = 0;
9798 kvm->arch.last_tsc_write = 0;
9799 }
9800
9801 }
9802 return 0;
9803}
9804
9805void kvm_arch_hardware_disable(void)
9806{
9807 kvm_x86_ops.hardware_disable();
9808 drop_user_return_notifiers();
9809}
9810
9811int kvm_arch_hardware_setup(void *opaque)
9812{
9813 struct kvm_x86_init_ops *ops = opaque;
9814 int r;
9815
9816 rdmsrl_safe(MSR_EFER, &host_efer);
9817
9818 if (boot_cpu_has(X86_FEATURE_XSAVES))
9819 rdmsrl(MSR_IA32_XSS, host_xss);
9820
9821 r = ops->hardware_setup();
9822 if (r != 0)
9823 return r;
9824
9825 memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
9826
9827 if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
9828 supported_xss = 0;
9829
9830#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
9831 cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
9832#undef __kvm_cpu_cap_has
9833
9834 if (kvm_has_tsc_control) {
9835
9836
9837
9838
9839
9840
9841 u64 max = min(0x7fffffffULL,
9842 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
9843 kvm_max_guest_tsc_khz = max;
9844
9845 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
9846 }
9847
9848 kvm_init_msr_list();
9849 return 0;
9850}
9851
9852void kvm_arch_hardware_unsetup(void)
9853{
9854 kvm_x86_ops.hardware_unsetup();
9855}
9856
9857int kvm_arch_check_processor_compat(void *opaque)
9858{
9859 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
9860 struct kvm_x86_init_ops *ops = opaque;
9861
9862 WARN_ON(!irqs_disabled());
9863
9864 if (__cr4_reserved_bits(cpu_has, c) !=
9865 __cr4_reserved_bits(cpu_has, &boot_cpu_data))
9866 return -EIO;
9867
9868 return ops->check_processor_compatibility();
9869}
9870
9871bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
9872{
9873 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
9874}
9875EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
9876
9877bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
9878{
9879 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
9880}
9881
9882struct static_key kvm_no_apic_vcpu __read_mostly;
9883EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
9884
9885void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
9886{
9887 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
9888
9889 vcpu->arch.l1tf_flush_l1d = true;
9890 if (pmu->version && unlikely(pmu->event_count)) {
9891 pmu->need_cleanup = true;
9892 kvm_make_request(KVM_REQ_PMU, vcpu);
9893 }
9894 kvm_x86_ops.sched_in(vcpu, cpu);
9895}
9896
9897void kvm_arch_free_vm(struct kvm *kvm)
9898{
9899 kfree(kvm->arch.hyperv.hv_pa_pg);
9900 vfree(kvm);
9901}
9902
9903
9904int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
9905{
9906 if (type)
9907 return -EINVAL;
9908
9909 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
9910 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
9911 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
9912 INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
9913 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
9914 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
9915
9916
9917 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
9918
9919 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
9920 &kvm->arch.irq_sources_bitmap);
9921
9922 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
9923 mutex_init(&kvm->arch.apic_map_lock);
9924 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
9925
9926 kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
9927 pvclock_update_vm_gtod_copy(kvm);
9928
9929 kvm->arch.guest_can_read_msr_platform_info = true;
9930
9931 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
9932 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
9933
9934 kvm_hv_init_vm(kvm);
9935 kvm_page_track_init(kvm);
9936 kvm_mmu_init_vm(kvm);
9937
9938 return kvm_x86_ops.vm_init(kvm);
9939}
9940
9941int kvm_arch_post_init_vm(struct kvm *kvm)
9942{
9943 return kvm_mmu_post_init_vm(kvm);
9944}
9945
9946static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
9947{
9948 vcpu_load(vcpu);
9949 kvm_mmu_unload(vcpu);
9950 vcpu_put(vcpu);
9951}
9952
9953static void kvm_free_vcpus(struct kvm *kvm)
9954{
9955 unsigned int i;
9956 struct kvm_vcpu *vcpu;
9957
9958
9959
9960
9961 kvm_for_each_vcpu(i, vcpu, kvm) {
9962 kvm_clear_async_pf_completion_queue(vcpu);
9963 kvm_unload_vcpu_mmu(vcpu);
9964 }
9965 kvm_for_each_vcpu(i, vcpu, kvm)
9966 kvm_vcpu_destroy(vcpu);
9967
9968 mutex_lock(&kvm->lock);
9969 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
9970 kvm->vcpus[i] = NULL;
9971
9972 atomic_set(&kvm->online_vcpus, 0);
9973 mutex_unlock(&kvm->lock);
9974}
9975
9976void kvm_arch_sync_events(struct kvm *kvm)
9977{
9978 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
9979 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
9980 kvm_free_pit(kvm);
9981}
9982
9983int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
9984{
9985 int i, r;
9986 unsigned long hva, old_npages;
9987 struct kvm_memslots *slots = kvm_memslots(kvm);
9988 struct kvm_memory_slot *slot;
9989
9990
9991 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
9992 return -EINVAL;
9993
9994 slot = id_to_memslot(slots, id);
9995 if (size) {
9996 if (slot && slot->npages)
9997 return -EEXIST;
9998
9999
10000
10001
10002
10003 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
10004 MAP_SHARED | MAP_ANONYMOUS, 0);
10005 if (IS_ERR((void *)hva))
10006 return PTR_ERR((void *)hva);
10007 } else {
10008 if (!slot || !slot->npages)
10009 return 0;
10010
10011 old_npages = slot->npages;
10012 hva = 0;
10013 }
10014
10015 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
10016 struct kvm_userspace_memory_region m;
10017
10018 m.slot = id | (i << 16);
10019 m.flags = 0;
10020 m.guest_phys_addr = gpa;
10021 m.userspace_addr = hva;
10022 m.memory_size = size;
10023 r = __kvm_set_memory_region(kvm, &m);
10024 if (r < 0)
10025 return r;
10026 }
10027
10028 if (!size)
10029 vm_munmap(hva, old_npages * PAGE_SIZE);
10030
10031 return 0;
10032}
10033EXPORT_SYMBOL_GPL(__x86_set_memory_region);
10034
10035void kvm_arch_pre_destroy_vm(struct kvm *kvm)
10036{
10037 kvm_mmu_pre_destroy_vm(kvm);
10038}
10039
10040void kvm_arch_destroy_vm(struct kvm *kvm)
10041{
10042 if (current->mm == kvm->mm) {
10043
10044
10045
10046
10047
10048 mutex_lock(&kvm->slots_lock);
10049 __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
10050 0, 0);
10051 __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
10052 0, 0);
10053 __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
10054 mutex_unlock(&kvm->slots_lock);
10055 }
10056 if (kvm_x86_ops.vm_destroy)
10057 kvm_x86_ops.vm_destroy(kvm);
10058 kvm_pic_destroy(kvm);
10059 kvm_ioapic_destroy(kvm);
10060 kvm_free_vcpus(kvm);
10061 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
10062 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
10063 kvm_mmu_uninit_vm(kvm);
10064 kvm_page_track_cleanup(kvm);
10065 kvm_hv_destroy_vm(kvm);
10066}
10067
10068void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
10069{
10070 int i;
10071
10072 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
10073 kvfree(slot->arch.rmap[i]);
10074 slot->arch.rmap[i] = NULL;
10075
10076 if (i == 0)
10077 continue;
10078
10079 kvfree(slot->arch.lpage_info[i - 1]);
10080 slot->arch.lpage_info[i - 1] = NULL;
10081 }
10082
10083 kvm_page_track_free_memslot(slot);
10084}
10085
10086static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
10087 unsigned long npages)
10088{
10089 int i;
10090
10091
10092
10093
10094
10095
10096 memset(&slot->arch, 0, sizeof(slot->arch));
10097
10098 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
10099 struct kvm_lpage_info *linfo;
10100 unsigned long ugfn;
10101 int lpages;
10102 int level = i + 1;
10103
10104 lpages = gfn_to_index(slot->base_gfn + npages - 1,
10105 slot->base_gfn, level) + 1;
10106
10107 slot->arch.rmap[i] =
10108 kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
10109 GFP_KERNEL_ACCOUNT);
10110 if (!slot->arch.rmap[i])
10111 goto out_free;
10112 if (i == 0)
10113 continue;
10114
10115 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
10116 if (!linfo)
10117 goto out_free;
10118
10119 slot->arch.lpage_info[i - 1] = linfo;
10120
10121 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
10122 linfo[0].disallow_lpage = 1;
10123 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
10124 linfo[lpages - 1].disallow_lpage = 1;
10125 ugfn = slot->userspace_addr >> PAGE_SHIFT;
10126
10127
10128
10129
10130 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1)) {
10131 unsigned long j;
10132
10133 for (j = 0; j < lpages; ++j)
10134 linfo[j].disallow_lpage = 1;
10135 }
10136 }
10137
10138 if (kvm_page_track_create_memslot(slot, npages))
10139 goto out_free;
10140
10141 return 0;
10142
10143out_free:
10144 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
10145 kvfree(slot->arch.rmap[i]);
10146 slot->arch.rmap[i] = NULL;
10147 if (i == 0)
10148 continue;
10149
10150 kvfree(slot->arch.lpage_info[i - 1]);
10151 slot->arch.lpage_info[i - 1] = NULL;
10152 }
10153 return -ENOMEM;
10154}
10155
10156void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
10157{
10158 struct kvm_vcpu *vcpu;
10159 int i;
10160
10161
10162
10163
10164
10165 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
10166
10167
10168 kvm_for_each_vcpu(i, vcpu, kvm)
10169 kvm_vcpu_kick(vcpu);
10170}
10171
10172int kvm_arch_prepare_memory_region(struct kvm *kvm,
10173 struct kvm_memory_slot *memslot,
10174 const struct kvm_userspace_memory_region *mem,
10175 enum kvm_mr_change change)
10176{
10177 if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
10178 return kvm_alloc_memslot_metadata(memslot,
10179 mem->memory_size >> PAGE_SHIFT);
10180 return 0;
10181}
10182
10183static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
10184 struct kvm_memory_slot *old,
10185 struct kvm_memory_slot *new,
10186 enum kvm_mr_change change)
10187{
10188
10189
10190
10191
10192 if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
10193 return;
10194
10195
10196
10197
10198
10199
10200
10201
10202
10203
10204
10205
10206
10207
10208
10209
10210
10211
10212 if ((old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
10213 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
10214 kvm_mmu_zap_collapsible_sptes(kvm, new);
10215
10216
10217
10218
10219
10220
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232
10233
10234
10235
10236
10237
10238
10239
10240
10241
10242
10243
10244 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
10245 if (kvm_x86_ops.slot_enable_log_dirty) {
10246 kvm_x86_ops.slot_enable_log_dirty(kvm, new);
10247 } else {
10248 int level =
10249 kvm_dirty_log_manual_protect_and_init_set(kvm) ?
10250 PG_LEVEL_2M : PG_LEVEL_4K;
10251
10252
10253
10254
10255
10256
10257
10258
10259
10260 kvm_mmu_slot_remove_write_access(kvm, new, level);
10261 }
10262 } else {
10263 if (kvm_x86_ops.slot_disable_log_dirty)
10264 kvm_x86_ops.slot_disable_log_dirty(kvm, new);
10265 }
10266}
10267
10268void kvm_arch_commit_memory_region(struct kvm *kvm,
10269 const struct kvm_userspace_memory_region *mem,
10270 struct kvm_memory_slot *old,
10271 const struct kvm_memory_slot *new,
10272 enum kvm_mr_change change)
10273{
10274 if (!kvm->arch.n_requested_mmu_pages)
10275 kvm_mmu_change_mmu_pages(kvm,
10276 kvm_mmu_calculate_default_mmu_pages(kvm));
10277
10278
10279
10280
10281 kvm_mmu_slot_apply_flags(kvm, old, (struct kvm_memory_slot *) new, change);
10282
10283
10284 if (change == KVM_MR_MOVE)
10285 kvm_arch_free_memslot(kvm, old);
10286}
10287
10288void kvm_arch_flush_shadow_all(struct kvm *kvm)
10289{
10290 kvm_mmu_zap_all(kvm);
10291}
10292
10293void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
10294 struct kvm_memory_slot *slot)
10295{
10296 kvm_page_track_flush_slot(kvm, slot);
10297}
10298
10299static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
10300{
10301 return (is_guest_mode(vcpu) &&
10302 kvm_x86_ops.guest_apic_has_interrupt &&
10303 kvm_x86_ops.guest_apic_has_interrupt(vcpu));
10304}
10305
10306static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
10307{
10308 if (!list_empty_careful(&vcpu->async_pf.done))
10309 return true;
10310
10311 if (kvm_apic_has_events(vcpu))
10312 return true;
10313
10314 if (vcpu->arch.pv.pv_unhalted)
10315 return true;
10316
10317 if (vcpu->arch.exception.pending)
10318 return true;
10319
10320 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
10321 (vcpu->arch.nmi_pending &&
10322 kvm_x86_ops.nmi_allowed(vcpu, false)))
10323 return true;
10324
10325 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
10326 (vcpu->arch.smi_pending &&
10327 kvm_x86_ops.smi_allowed(vcpu, false)))
10328 return true;
10329
10330 if (kvm_arch_interrupt_allowed(vcpu) &&
10331 (kvm_cpu_has_interrupt(vcpu) ||
10332 kvm_guest_apic_has_interrupt(vcpu)))
10333 return true;
10334
10335 if (kvm_hv_has_stimer_pending(vcpu))
10336 return true;
10337
10338 if (is_guest_mode(vcpu) &&
10339 kvm_x86_ops.nested_ops->hv_timer_pending &&
10340 kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
10341 return true;
10342
10343 return false;
10344}
10345
10346int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
10347{
10348 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
10349}
10350
10351bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
10352{
10353 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
10354 return true;
10355
10356 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
10357 kvm_test_request(KVM_REQ_SMI, vcpu) ||
10358 kvm_test_request(KVM_REQ_EVENT, vcpu))
10359 return true;
10360
10361 if (vcpu->arch.apicv_active && kvm_x86_ops.dy_apicv_has_pending_interrupt(vcpu))
10362 return true;
10363
10364 return false;
10365}
10366
10367bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
10368{
10369 return vcpu->arch.preempted_in_kernel;
10370}
10371
10372int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
10373{
10374 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
10375}
10376
10377int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
10378{
10379 return kvm_x86_ops.interrupt_allowed(vcpu, false);
10380}
10381
10382unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
10383{
10384 if (is_64_bit_mode(vcpu))
10385 return kvm_rip_read(vcpu);
10386 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
10387 kvm_rip_read(vcpu));
10388}
10389EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
10390
10391bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
10392{
10393 return kvm_get_linear_rip(vcpu) == linear_rip;
10394}
10395EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
10396
10397unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
10398{
10399 unsigned long rflags;
10400
10401 rflags = kvm_x86_ops.get_rflags(vcpu);
10402 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
10403 rflags &= ~X86_EFLAGS_TF;
10404 return rflags;
10405}
10406EXPORT_SYMBOL_GPL(kvm_get_rflags);
10407
10408static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
10409{
10410 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
10411 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
10412 rflags |= X86_EFLAGS_TF;
10413 kvm_x86_ops.set_rflags(vcpu, rflags);
10414}
10415
10416void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
10417{
10418 __kvm_set_rflags(vcpu, rflags);
10419 kvm_make_request(KVM_REQ_EVENT, vcpu);
10420}
10421EXPORT_SYMBOL_GPL(kvm_set_rflags);
10422
10423void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
10424{
10425 int r;
10426
10427 if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
10428 work->wakeup_all)
10429 return;
10430
10431 r = kvm_mmu_reload(vcpu);
10432 if (unlikely(r))
10433 return;
10434
10435 if (!vcpu->arch.mmu->direct_map &&
10436 work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
10437 return;
10438
10439 kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
10440}
10441
10442static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
10443{
10444 BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU));
10445
10446 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
10447}
10448
10449static inline u32 kvm_async_pf_next_probe(u32 key)
10450{
10451 return (key + 1) & (ASYNC_PF_PER_VCPU - 1);
10452}
10453
10454static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10455{
10456 u32 key = kvm_async_pf_hash_fn(gfn);
10457
10458 while (vcpu->arch.apf.gfns[key] != ~0)
10459 key = kvm_async_pf_next_probe(key);
10460
10461 vcpu->arch.apf.gfns[key] = gfn;
10462}
10463
10464static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
10465{
10466 int i;
10467 u32 key = kvm_async_pf_hash_fn(gfn);
10468
10469 for (i = 0; i < ASYNC_PF_PER_VCPU &&
10470 (vcpu->arch.apf.gfns[key] != gfn &&
10471 vcpu->arch.apf.gfns[key] != ~0); i++)
10472 key = kvm_async_pf_next_probe(key);
10473
10474 return key;
10475}
10476
10477bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10478{
10479 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
10480}
10481
10482static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10483{
10484 u32 i, j, k;
10485
10486 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
10487
10488 if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn))
10489 return;
10490
10491 while (true) {
10492 vcpu->arch.apf.gfns[i] = ~0;
10493 do {
10494 j = kvm_async_pf_next_probe(j);
10495 if (vcpu->arch.apf.gfns[j] == ~0)
10496 return;
10497 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
10498
10499
10500
10501
10502
10503 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
10504 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
10505 i = j;
10506 }
10507}
10508
10509static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu)
10510{
10511 u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT;
10512
10513 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason,
10514 sizeof(reason));
10515}
10516
10517static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token)
10518{
10519 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
10520
10521 return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
10522 &token, offset, sizeof(token));
10523}
10524
10525static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu)
10526{
10527 unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token);
10528 u32 val;
10529
10530 if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data,
10531 &val, offset, sizeof(val)))
10532 return false;
10533
10534 return !val;
10535}
10536
10537static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
10538{
10539 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
10540 return false;
10541
10542 if (!kvm_pv_async_pf_enabled(vcpu) ||
10543 (vcpu->arch.apf.send_user_only && kvm_x86_ops.get_cpl(vcpu) == 0))
10544 return false;
10545
10546 return true;
10547}
10548
10549bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
10550{
10551 if (unlikely(!lapic_in_kernel(vcpu) ||
10552 kvm_event_needs_reinjection(vcpu) ||
10553 vcpu->arch.exception.pending))
10554 return false;
10555
10556 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
10557 return false;
10558
10559
10560
10561
10562
10563 return kvm_arch_interrupt_allowed(vcpu);
10564}
10565
10566bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
10567 struct kvm_async_pf *work)
10568{
10569 struct x86_exception fault;
10570
10571 trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
10572 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
10573
10574 if (kvm_can_deliver_async_pf(vcpu) &&
10575 !apf_put_user_notpresent(vcpu)) {
10576 fault.vector = PF_VECTOR;
10577 fault.error_code_valid = true;
10578 fault.error_code = 0;
10579 fault.nested_page_fault = false;
10580 fault.address = work->arch.token;
10581 fault.async_page_fault = true;
10582 kvm_inject_page_fault(vcpu, &fault);
10583 return true;
10584 } else {
10585
10586
10587
10588
10589
10590
10591
10592
10593 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
10594 return false;
10595 }
10596}
10597
10598void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
10599 struct kvm_async_pf *work)
10600{
10601 struct kvm_lapic_irq irq = {
10602 .delivery_mode = APIC_DM_FIXED,
10603 .vector = vcpu->arch.apf.vec
10604 };
10605
10606 if (work->wakeup_all)
10607 work->arch.token = ~0;
10608 else
10609 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
10610 trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
10611
10612 if ((work->wakeup_all || work->notpresent_injected) &&
10613 kvm_pv_async_pf_enabled(vcpu) &&
10614 !apf_put_user_ready(vcpu, work->arch.token)) {
10615 vcpu->arch.apf.pageready_pending = true;
10616 kvm_apic_set_irq(vcpu, &irq, NULL);
10617 }
10618
10619 vcpu->arch.apf.halted = false;
10620 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10621}
10622
10623void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu)
10624{
10625 kvm_make_request(KVM_REQ_APF_READY, vcpu);
10626 if (!vcpu->arch.apf.pageready_pending)
10627 kvm_vcpu_kick(vcpu);
10628}
10629
10630bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
10631{
10632 if (!kvm_pv_async_pf_enabled(vcpu))
10633 return true;
10634 else
10635 return apf_pageready_slot_free(vcpu);
10636}
10637
10638void kvm_arch_start_assignment(struct kvm *kvm)
10639{
10640 atomic_inc(&kvm->arch.assigned_device_count);
10641}
10642EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
10643
10644void kvm_arch_end_assignment(struct kvm *kvm)
10645{
10646 atomic_dec(&kvm->arch.assigned_device_count);
10647}
10648EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
10649
10650bool kvm_arch_has_assigned_device(struct kvm *kvm)
10651{
10652 return atomic_read(&kvm->arch.assigned_device_count);
10653}
10654EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
10655
10656void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
10657{
10658 atomic_inc(&kvm->arch.noncoherent_dma_count);
10659}
10660EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
10661
10662void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
10663{
10664 atomic_dec(&kvm->arch.noncoherent_dma_count);
10665}
10666EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
10667
10668bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
10669{
10670 return atomic_read(&kvm->arch.noncoherent_dma_count);
10671}
10672EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
10673
10674bool kvm_arch_has_irq_bypass(void)
10675{
10676 return true;
10677}
10678
10679int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
10680 struct irq_bypass_producer *prod)
10681{
10682 struct kvm_kernel_irqfd *irqfd =
10683 container_of(cons, struct kvm_kernel_irqfd, consumer);
10684 int ret;
10685
10686 irqfd->producer = prod;
10687 kvm_arch_start_assignment(irqfd->kvm);
10688 ret = kvm_x86_ops.update_pi_irte(irqfd->kvm,
10689 prod->irq, irqfd->gsi, 1);
10690
10691 if (ret)
10692 kvm_arch_end_assignment(irqfd->kvm);
10693
10694 return ret;
10695}
10696
10697void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
10698 struct irq_bypass_producer *prod)
10699{
10700 int ret;
10701 struct kvm_kernel_irqfd *irqfd =
10702 container_of(cons, struct kvm_kernel_irqfd, consumer);
10703
10704 WARN_ON(irqfd->producer != prod);
10705 irqfd->producer = NULL;
10706
10707
10708
10709
10710
10711
10712
10713 ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
10714 if (ret)
10715 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
10716 " fails: %d\n", irqfd->consumer.token, ret);
10717
10718 kvm_arch_end_assignment(irqfd->kvm);
10719}
10720
10721int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
10722 uint32_t guest_irq, bool set)
10723{
10724 return kvm_x86_ops.update_pi_irte(kvm, host_irq, guest_irq, set);
10725}
10726
10727bool kvm_vector_hashing_enabled(void)
10728{
10729 return vector_hashing;
10730}
10731
10732bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
10733{
10734 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
10735}
10736EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
10737
10738
10739int kvm_spec_ctrl_test_value(u64 value)
10740{
10741
10742
10743
10744
10745
10746 u64 saved_value;
10747 unsigned long flags;
10748 int ret = 0;
10749
10750 local_irq_save(flags);
10751
10752 if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value))
10753 ret = 1;
10754 else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value))
10755 ret = 1;
10756 else
10757 wrmsrl(MSR_IA32_SPEC_CTRL, saved_value);
10758
10759 local_irq_restore(flags);
10760
10761 return ret;
10762}
10763EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
10764
10765void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
10766{
10767 struct x86_exception fault;
10768 u32 access = error_code &
10769 (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
10770
10771 if (!(error_code & PFERR_PRESENT_MASK) ||
10772 vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) {
10773
10774
10775
10776
10777
10778 fault.vector = PF_VECTOR;
10779 fault.error_code_valid = true;
10780 fault.error_code = error_code;
10781 fault.nested_page_fault = false;
10782 fault.address = gva;
10783 }
10784 vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault);
10785}
10786EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error);
10787
10788EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
10789EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
10790EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
10791EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
10792EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
10793EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
10794EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
10795EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
10796EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
10797EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
10798EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
10799EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
10800EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
10801EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
10802EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
10803EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
10804EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
10805EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
10806EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
10807EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
10808EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
10809EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
10810