1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kvm_host.h>
20#include "irq.h"
21#include "mmu.h"
22#include "i8254.h"
23#include "tss.h"
24#include "kvm_cache_regs.h"
25#include "x86.h"
26#include "cpuid.h"
27#include "pmu.h"
28#include "hyperv.h"
29
30#include <linux/clocksource.h>
31#include <linux/interrupt.h>
32#include <linux/kvm.h>
33#include <linux/fs.h>
34#include <linux/vmalloc.h>
35#include <linux/export.h>
36#include <linux/moduleparam.h>
37#include <linux/mman.h>
38#include <linux/highmem.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/cpufreq.h>
42#include <linux/user-return-notifier.h>
43#include <linux/srcu.h>
44#include <linux/slab.h>
45#include <linux/perf_event.h>
46#include <linux/uaccess.h>
47#include <linux/hash.h>
48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
51#include <linux/kvm_irqfd.h>
52#include <linux/irqbypass.h>
53#include <linux/sched/stat.h>
54#include <linux/sched/isolation.h>
55#include <linux/mem_encrypt.h>
56
57#include <trace/events/kvm.h>
58
59#include <asm/debugreg.h>
60#include <asm/msr.h>
61#include <asm/desc.h>
62#include <asm/mce.h>
63#include <linux/kernel_stat.h>
64#include <asm/fpu/internal.h>
65#include <asm/pvclock.h>
66#include <asm/div64.h>
67#include <asm/irq_remapping.h>
68#include <asm/mshyperv.h>
69#include <asm/hypervisor.h>
70#include <asm/intel_pt.h>
71#include <clocksource/hyperv_timer.h>
72
73#define CREATE_TRACE_POINTS
74#include "trace.h"
75
76#define MAX_IO_MSRS 256
77#define KVM_MAX_MCE_BANKS 32
78u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
79EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
80
81#define emul_to_vcpu(ctxt) \
82 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
83
84
85
86
87
88#ifdef CONFIG_X86_64
89static
90u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
91#else
92static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
93#endif
94
95#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
96#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
97
98#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
99 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
100
101static void update_cr8_intercept(struct kvm_vcpu *vcpu);
102static void process_nmi(struct kvm_vcpu *vcpu);
103static void enter_smm(struct kvm_vcpu *vcpu);
104static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
105static void store_regs(struct kvm_vcpu *vcpu);
106static int sync_regs(struct kvm_vcpu *vcpu);
107
108struct kvm_x86_ops *kvm_x86_ops __read_mostly;
109EXPORT_SYMBOL_GPL(kvm_x86_ops);
110
111static bool __read_mostly ignore_msrs = 0;
112module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
113
114static bool __read_mostly report_ignored_msrs = true;
115module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
116
117unsigned int min_timer_period_us = 200;
118module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
119
120static bool __read_mostly kvmclock_periodic_sync = true;
121module_param(kvmclock_periodic_sync, bool, S_IRUGO);
122
123bool __read_mostly kvm_has_tsc_control;
124EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
125u32 __read_mostly kvm_max_guest_tsc_khz;
126EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
127u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
128EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
129u64 __read_mostly kvm_max_tsc_scaling_ratio;
130EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
131u64 __read_mostly kvm_default_tsc_scaling_ratio;
132EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
133
134
135static u32 __read_mostly tsc_tolerance_ppm = 250;
136module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
137
138
139
140
141
142
143
144static int __read_mostly lapic_timer_advance_ns = -1;
145module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
146
147static bool __read_mostly vector_hashing = true;
148module_param(vector_hashing, bool, S_IRUGO);
149
150bool __read_mostly enable_vmware_backdoor = false;
151module_param(enable_vmware_backdoor, bool, S_IRUGO);
152EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
153
154static bool __read_mostly force_emulation_prefix = false;
155module_param(force_emulation_prefix, bool, S_IRUGO);
156
157int __read_mostly pi_inject_timer = -1;
158module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
159
160#define KVM_NR_SHARED_MSRS 16
161
162struct kvm_shared_msrs_global {
163 int nr;
164 u32 msrs[KVM_NR_SHARED_MSRS];
165};
166
167struct kvm_shared_msrs {
168 struct user_return_notifier urn;
169 bool registered;
170 struct kvm_shared_msr_values {
171 u64 host;
172 u64 curr;
173 } values[KVM_NR_SHARED_MSRS];
174};
175
176static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
177static struct kvm_shared_msrs __percpu *shared_msrs;
178
179struct kvm_stats_debugfs_item debugfs_entries[] = {
180 { "pf_fixed", VCPU_STAT(pf_fixed) },
181 { "pf_guest", VCPU_STAT(pf_guest) },
182 { "tlb_flush", VCPU_STAT(tlb_flush) },
183 { "invlpg", VCPU_STAT(invlpg) },
184 { "exits", VCPU_STAT(exits) },
185 { "io_exits", VCPU_STAT(io_exits) },
186 { "mmio_exits", VCPU_STAT(mmio_exits) },
187 { "signal_exits", VCPU_STAT(signal_exits) },
188 { "irq_window", VCPU_STAT(irq_window_exits) },
189 { "nmi_window", VCPU_STAT(nmi_window_exits) },
190 { "halt_exits", VCPU_STAT(halt_exits) },
191 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
192 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
193 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
194 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
195 { "hypercalls", VCPU_STAT(hypercalls) },
196 { "request_irq", VCPU_STAT(request_irq_exits) },
197 { "irq_exits", VCPU_STAT(irq_exits) },
198 { "host_state_reload", VCPU_STAT(host_state_reload) },
199 { "fpu_reload", VCPU_STAT(fpu_reload) },
200 { "insn_emulation", VCPU_STAT(insn_emulation) },
201 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
202 { "irq_injections", VCPU_STAT(irq_injections) },
203 { "nmi_injections", VCPU_STAT(nmi_injections) },
204 { "req_event", VCPU_STAT(req_event) },
205 { "l1d_flush", VCPU_STAT(l1d_flush) },
206 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
207 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
208 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
209 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
210 { "mmu_flooded", VM_STAT(mmu_flooded) },
211 { "mmu_recycled", VM_STAT(mmu_recycled) },
212 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
213 { "mmu_unsync", VM_STAT(mmu_unsync) },
214 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
215 { "largepages", VM_STAT(lpages) },
216 { "max_mmu_page_hash_collisions",
217 VM_STAT(max_mmu_page_hash_collisions) },
218 { NULL }
219};
220
221u64 __read_mostly host_xcr0;
222
223struct kmem_cache *x86_fpu_cache;
224EXPORT_SYMBOL_GPL(x86_fpu_cache);
225
226static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
227
228static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
229{
230 int i;
231 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
232 vcpu->arch.apf.gfns[i] = ~0;
233}
234
235static void kvm_on_user_return(struct user_return_notifier *urn)
236{
237 unsigned slot;
238 struct kvm_shared_msrs *locals
239 = container_of(urn, struct kvm_shared_msrs, urn);
240 struct kvm_shared_msr_values *values;
241 unsigned long flags;
242
243
244
245
246
247 local_irq_save(flags);
248 if (locals->registered) {
249 locals->registered = false;
250 user_return_notifier_unregister(urn);
251 }
252 local_irq_restore(flags);
253 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
254 values = &locals->values[slot];
255 if (values->host != values->curr) {
256 wrmsrl(shared_msrs_global.msrs[slot], values->host);
257 values->curr = values->host;
258 }
259 }
260}
261
262static void shared_msr_update(unsigned slot, u32 msr)
263{
264 u64 value;
265 unsigned int cpu = smp_processor_id();
266 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
267
268
269
270 if (slot >= shared_msrs_global.nr) {
271 printk(KERN_ERR "kvm: invalid MSR slot!");
272 return;
273 }
274 rdmsrl_safe(msr, &value);
275 smsr->values[slot].host = value;
276 smsr->values[slot].curr = value;
277}
278
279void kvm_define_shared_msr(unsigned slot, u32 msr)
280{
281 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
282 shared_msrs_global.msrs[slot] = msr;
283 if (slot >= shared_msrs_global.nr)
284 shared_msrs_global.nr = slot + 1;
285}
286EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
287
288static void kvm_shared_msr_cpu_online(void)
289{
290 unsigned i;
291
292 for (i = 0; i < shared_msrs_global.nr; ++i)
293 shared_msr_update(i, shared_msrs_global.msrs[i]);
294}
295
296int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
297{
298 unsigned int cpu = smp_processor_id();
299 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
300 int err;
301
302 if (((value ^ smsr->values[slot].curr) & mask) == 0)
303 return 0;
304 smsr->values[slot].curr = value;
305 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
306 if (err)
307 return 1;
308
309 if (!smsr->registered) {
310 smsr->urn.on_user_return = kvm_on_user_return;
311 user_return_notifier_register(&smsr->urn);
312 smsr->registered = true;
313 }
314 return 0;
315}
316EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
317
318static void drop_user_return_notifiers(void)
319{
320 unsigned int cpu = smp_processor_id();
321 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
322
323 if (smsr->registered)
324 kvm_on_user_return(&smsr->urn);
325}
326
327u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
328{
329 return vcpu->arch.apic_base;
330}
331EXPORT_SYMBOL_GPL(kvm_get_apic_base);
332
333enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
334{
335 return kvm_apic_mode(kvm_get_apic_base(vcpu));
336}
337EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
338
339int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
340{
341 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
342 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
343 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
344 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
345
346 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
347 return 1;
348 if (!msr_info->host_initiated) {
349 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
350 return 1;
351 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
352 return 1;
353 }
354
355 kvm_lapic_set_base(vcpu, msr_info->data);
356 return 0;
357}
358EXPORT_SYMBOL_GPL(kvm_set_apic_base);
359
360asmlinkage __visible void kvm_spurious_fault(void)
361{
362
363 BUG();
364}
365EXPORT_SYMBOL_GPL(kvm_spurious_fault);
366
367#define EXCPT_BENIGN 0
368#define EXCPT_CONTRIBUTORY 1
369#define EXCPT_PF 2
370
371static int exception_class(int vector)
372{
373 switch (vector) {
374 case PF_VECTOR:
375 return EXCPT_PF;
376 case DE_VECTOR:
377 case TS_VECTOR:
378 case NP_VECTOR:
379 case SS_VECTOR:
380 case GP_VECTOR:
381 return EXCPT_CONTRIBUTORY;
382 default:
383 break;
384 }
385 return EXCPT_BENIGN;
386}
387
388#define EXCPT_FAULT 0
389#define EXCPT_TRAP 1
390#define EXCPT_ABORT 2
391#define EXCPT_INTERRUPT 3
392
393static int exception_type(int vector)
394{
395 unsigned int mask;
396
397 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
398 return EXCPT_INTERRUPT;
399
400 mask = 1 << vector;
401
402
403 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
404 return EXCPT_TRAP;
405
406 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
407 return EXCPT_ABORT;
408
409
410 return EXCPT_FAULT;
411}
412
413void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
414{
415 unsigned nr = vcpu->arch.exception.nr;
416 bool has_payload = vcpu->arch.exception.has_payload;
417 unsigned long payload = vcpu->arch.exception.payload;
418
419 if (!has_payload)
420 return;
421
422 switch (nr) {
423 case DB_VECTOR:
424
425
426
427
428
429 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
430
431
432
433 vcpu->arch.dr6 |= DR6_RTM;
434 vcpu->arch.dr6 |= payload;
435
436
437
438
439
440
441
442
443 vcpu->arch.dr6 ^= payload & DR6_RTM;
444 break;
445 case PF_VECTOR:
446 vcpu->arch.cr2 = payload;
447 break;
448 }
449
450 vcpu->arch.exception.has_payload = false;
451 vcpu->arch.exception.payload = 0;
452}
453EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
454
455static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
456 unsigned nr, bool has_error, u32 error_code,
457 bool has_payload, unsigned long payload, bool reinject)
458{
459 u32 prev_nr;
460 int class1, class2;
461
462 kvm_make_request(KVM_REQ_EVENT, vcpu);
463
464 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
465 queue:
466 if (has_error && !is_protmode(vcpu))
467 has_error = false;
468 if (reinject) {
469
470
471
472
473
474
475
476
477 WARN_ON_ONCE(vcpu->arch.exception.pending);
478 vcpu->arch.exception.injected = true;
479 if (WARN_ON_ONCE(has_payload)) {
480
481
482
483
484 has_payload = false;
485 payload = 0;
486 }
487 } else {
488 vcpu->arch.exception.pending = true;
489 vcpu->arch.exception.injected = false;
490 }
491 vcpu->arch.exception.has_error_code = has_error;
492 vcpu->arch.exception.nr = nr;
493 vcpu->arch.exception.error_code = error_code;
494 vcpu->arch.exception.has_payload = has_payload;
495 vcpu->arch.exception.payload = payload;
496
497
498
499
500
501
502
503
504
505
506
507 if (!vcpu->kvm->arch.exception_payload_enabled ||
508 !is_guest_mode(vcpu))
509 kvm_deliver_exception_payload(vcpu);
510 return;
511 }
512
513
514 prev_nr = vcpu->arch.exception.nr;
515 if (prev_nr == DF_VECTOR) {
516
517 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
518 return;
519 }
520 class1 = exception_class(prev_nr);
521 class2 = exception_class(nr);
522 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
523 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
524
525
526
527
528
529 vcpu->arch.exception.pending = true;
530 vcpu->arch.exception.injected = false;
531 vcpu->arch.exception.has_error_code = true;
532 vcpu->arch.exception.nr = DF_VECTOR;
533 vcpu->arch.exception.error_code = 0;
534 vcpu->arch.exception.has_payload = false;
535 vcpu->arch.exception.payload = 0;
536 } else
537
538
539
540 goto queue;
541}
542
543void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
544{
545 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
546}
547EXPORT_SYMBOL_GPL(kvm_queue_exception);
548
549void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
550{
551 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
552}
553EXPORT_SYMBOL_GPL(kvm_requeue_exception);
554
555static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
556 unsigned long payload)
557{
558 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
559}
560
561static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
562 u32 error_code, unsigned long payload)
563{
564 kvm_multiple_exception(vcpu, nr, true, error_code,
565 true, payload, false);
566}
567
568int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
569{
570 if (err)
571 kvm_inject_gp(vcpu, 0);
572 else
573 return kvm_skip_emulated_instruction(vcpu);
574
575 return 1;
576}
577EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
578
579void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
580{
581 ++vcpu->stat.pf_guest;
582 vcpu->arch.exception.nested_apf =
583 is_guest_mode(vcpu) && fault->async_page_fault;
584 if (vcpu->arch.exception.nested_apf) {
585 vcpu->arch.apf.nested_apf_token = fault->address;
586 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
587 } else {
588 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
589 fault->address);
590 }
591}
592EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
593
594static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
595{
596 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
597 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
598 else
599 vcpu->arch.mmu->inject_page_fault(vcpu, fault);
600
601 return fault->nested_page_fault;
602}
603
604void kvm_inject_nmi(struct kvm_vcpu *vcpu)
605{
606 atomic_inc(&vcpu->arch.nmi_queued);
607 kvm_make_request(KVM_REQ_NMI, vcpu);
608}
609EXPORT_SYMBOL_GPL(kvm_inject_nmi);
610
611void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
612{
613 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
614}
615EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
616
617void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
618{
619 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
620}
621EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
622
623
624
625
626
627bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
628{
629 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
630 return true;
631 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
632 return false;
633}
634EXPORT_SYMBOL_GPL(kvm_require_cpl);
635
636bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
637{
638 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
639 return true;
640
641 kvm_queue_exception(vcpu, UD_VECTOR);
642 return false;
643}
644EXPORT_SYMBOL_GPL(kvm_require_dr);
645
646
647
648
649
650
651int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
652 gfn_t ngfn, void *data, int offset, int len,
653 u32 access)
654{
655 struct x86_exception exception;
656 gfn_t real_gfn;
657 gpa_t ngpa;
658
659 ngpa = gfn_to_gpa(ngfn);
660 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
661 if (real_gfn == UNMAPPED_GVA)
662 return -EFAULT;
663
664 real_gfn = gpa_to_gfn(real_gfn);
665
666 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
667}
668EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
669
670static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
671 void *data, int offset, int len, u32 access)
672{
673 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
674 data, offset, len, access);
675}
676
677
678
679
680int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
681{
682 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
683 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
684 int i;
685 int ret;
686 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
687
688 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
689 offset * sizeof(u64), sizeof(pdpte),
690 PFERR_USER_MASK|PFERR_WRITE_MASK);
691 if (ret < 0) {
692 ret = 0;
693 goto out;
694 }
695 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
696 if ((pdpte[i] & PT_PRESENT_MASK) &&
697 (pdpte[i] &
698 vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) {
699 ret = 0;
700 goto out;
701 }
702 }
703 ret = 1;
704
705 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
706 __set_bit(VCPU_EXREG_PDPTR,
707 (unsigned long *)&vcpu->arch.regs_avail);
708 __set_bit(VCPU_EXREG_PDPTR,
709 (unsigned long *)&vcpu->arch.regs_dirty);
710out:
711
712 return ret;
713}
714EXPORT_SYMBOL_GPL(load_pdptrs);
715
716bool pdptrs_changed(struct kvm_vcpu *vcpu)
717{
718 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
719 bool changed = true;
720 int offset;
721 gfn_t gfn;
722 int r;
723
724 if (!is_pae_paging(vcpu))
725 return false;
726
727 if (!test_bit(VCPU_EXREG_PDPTR,
728 (unsigned long *)&vcpu->arch.regs_avail))
729 return true;
730
731 gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
732 offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
733 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
734 PFERR_USER_MASK | PFERR_WRITE_MASK);
735 if (r < 0)
736 goto out;
737 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
738out:
739
740 return changed;
741}
742EXPORT_SYMBOL_GPL(pdptrs_changed);
743
744int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
745{
746 unsigned long old_cr0 = kvm_read_cr0(vcpu);
747 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
748
749 cr0 |= X86_CR0_ET;
750
751#ifdef CONFIG_X86_64
752 if (cr0 & 0xffffffff00000000UL)
753 return 1;
754#endif
755
756 cr0 &= ~CR0_RESERVED_BITS;
757
758 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
759 return 1;
760
761 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
762 return 1;
763
764 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
765#ifdef CONFIG_X86_64
766 if ((vcpu->arch.efer & EFER_LME)) {
767 int cs_db, cs_l;
768
769 if (!is_pae(vcpu))
770 return 1;
771 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
772 if (cs_l)
773 return 1;
774 } else
775#endif
776 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
777 kvm_read_cr3(vcpu)))
778 return 1;
779 }
780
781 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
782 return 1;
783
784 kvm_x86_ops->set_cr0(vcpu, cr0);
785
786 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
787 kvm_clear_async_pf_completion_queue(vcpu);
788 kvm_async_pf_hash_reset(vcpu);
789 }
790
791 if ((cr0 ^ old_cr0) & update_bits)
792 kvm_mmu_reset_context(vcpu);
793
794 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
795 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
796 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
797 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
798
799 return 0;
800}
801EXPORT_SYMBOL_GPL(kvm_set_cr0);
802
803void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
804{
805 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
806}
807EXPORT_SYMBOL_GPL(kvm_lmsw);
808
809void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
810{
811 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
812 !vcpu->guest_xcr0_loaded) {
813
814 if (vcpu->arch.xcr0 != host_xcr0)
815 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
816 vcpu->guest_xcr0_loaded = 1;
817 }
818}
819EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
820
821void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
822{
823 if (vcpu->guest_xcr0_loaded) {
824 if (vcpu->arch.xcr0 != host_xcr0)
825 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
826 vcpu->guest_xcr0_loaded = 0;
827 }
828}
829EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
830
831static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
832{
833 u64 xcr0 = xcr;
834 u64 old_xcr0 = vcpu->arch.xcr0;
835 u64 valid_bits;
836
837
838 if (index != XCR_XFEATURE_ENABLED_MASK)
839 return 1;
840 if (!(xcr0 & XFEATURE_MASK_FP))
841 return 1;
842 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
843 return 1;
844
845
846
847
848
849
850 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
851 if (xcr0 & ~valid_bits)
852 return 1;
853
854 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
855 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
856 return 1;
857
858 if (xcr0 & XFEATURE_MASK_AVX512) {
859 if (!(xcr0 & XFEATURE_MASK_YMM))
860 return 1;
861 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
862 return 1;
863 }
864 vcpu->arch.xcr0 = xcr0;
865
866 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
867 kvm_update_cpuid(vcpu);
868 return 0;
869}
870
871int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
872{
873 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
874 __kvm_set_xcr(vcpu, index, xcr)) {
875 kvm_inject_gp(vcpu, 0);
876 return 1;
877 }
878 return 0;
879}
880EXPORT_SYMBOL_GPL(kvm_set_xcr);
881
882int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
883{
884 unsigned long old_cr4 = kvm_read_cr4(vcpu);
885 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
886 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
887
888 if (cr4 & CR4_RESERVED_BITS)
889 return 1;
890
891 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
892 return 1;
893
894 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
895 return 1;
896
897 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
898 return 1;
899
900 if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
901 return 1;
902
903 if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
904 return 1;
905
906 if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
907 return 1;
908
909 if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
910 return 1;
911
912 if (is_long_mode(vcpu)) {
913 if (!(cr4 & X86_CR4_PAE))
914 return 1;
915 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
916 && ((cr4 ^ old_cr4) & pdptr_bits)
917 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
918 kvm_read_cr3(vcpu)))
919 return 1;
920
921 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
922 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
923 return 1;
924
925
926 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
927 return 1;
928 }
929
930 if (kvm_x86_ops->set_cr4(vcpu, cr4))
931 return 1;
932
933 if (((cr4 ^ old_cr4) & pdptr_bits) ||
934 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
935 kvm_mmu_reset_context(vcpu);
936
937 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
938 kvm_update_cpuid(vcpu);
939
940 return 0;
941}
942EXPORT_SYMBOL_GPL(kvm_set_cr4);
943
944int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
945{
946 bool skip_tlb_flush = false;
947#ifdef CONFIG_X86_64
948 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
949
950 if (pcid_enabled) {
951 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
952 cr3 &= ~X86_CR3_PCID_NOFLUSH;
953 }
954#endif
955
956 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
957 if (!skip_tlb_flush) {
958 kvm_mmu_sync_roots(vcpu);
959 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
960 }
961 return 0;
962 }
963
964 if (is_long_mode(vcpu) &&
965 (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
966 return 1;
967 else if (is_pae_paging(vcpu) &&
968 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
969 return 1;
970
971 kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
972 vcpu->arch.cr3 = cr3;
973 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
974
975 return 0;
976}
977EXPORT_SYMBOL_GPL(kvm_set_cr3);
978
979int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
980{
981 if (cr8 & CR8_RESERVED_BITS)
982 return 1;
983 if (lapic_in_kernel(vcpu))
984 kvm_lapic_set_tpr(vcpu, cr8);
985 else
986 vcpu->arch.cr8 = cr8;
987 return 0;
988}
989EXPORT_SYMBOL_GPL(kvm_set_cr8);
990
991unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
992{
993 if (lapic_in_kernel(vcpu))
994 return kvm_lapic_get_cr8(vcpu);
995 else
996 return vcpu->arch.cr8;
997}
998EXPORT_SYMBOL_GPL(kvm_get_cr8);
999
1000static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1001{
1002 int i;
1003
1004 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1005 for (i = 0; i < KVM_NR_DB_REGS; i++)
1006 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1007 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
1008 }
1009}
1010
1011static void kvm_update_dr6(struct kvm_vcpu *vcpu)
1012{
1013 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1014 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
1015}
1016
1017static void kvm_update_dr7(struct kvm_vcpu *vcpu)
1018{
1019 unsigned long dr7;
1020
1021 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1022 dr7 = vcpu->arch.guest_debug_dr7;
1023 else
1024 dr7 = vcpu->arch.dr7;
1025 kvm_x86_ops->set_dr7(vcpu, dr7);
1026 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1027 if (dr7 & DR7_BP_EN_MASK)
1028 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1029}
1030
1031static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1032{
1033 u64 fixed = DR6_FIXED_1;
1034
1035 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1036 fixed |= DR6_RTM;
1037 return fixed;
1038}
1039
1040static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1041{
1042 switch (dr) {
1043 case 0 ... 3:
1044 vcpu->arch.db[dr] = val;
1045 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1046 vcpu->arch.eff_db[dr] = val;
1047 break;
1048 case 4:
1049
1050 case 6:
1051 if (val & 0xffffffff00000000ULL)
1052 return -1;
1053 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1054 kvm_update_dr6(vcpu);
1055 break;
1056 case 5:
1057
1058 default:
1059 if (val & 0xffffffff00000000ULL)
1060 return -1;
1061 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1062 kvm_update_dr7(vcpu);
1063 break;
1064 }
1065
1066 return 0;
1067}
1068
1069int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1070{
1071 if (__kvm_set_dr(vcpu, dr, val)) {
1072 kvm_inject_gp(vcpu, 0);
1073 return 1;
1074 }
1075 return 0;
1076}
1077EXPORT_SYMBOL_GPL(kvm_set_dr);
1078
1079int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1080{
1081 switch (dr) {
1082 case 0 ... 3:
1083 *val = vcpu->arch.db[dr];
1084 break;
1085 case 4:
1086
1087 case 6:
1088 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1089 *val = vcpu->arch.dr6;
1090 else
1091 *val = kvm_x86_ops->get_dr6(vcpu);
1092 break;
1093 case 5:
1094
1095 default:
1096 *val = vcpu->arch.dr7;
1097 break;
1098 }
1099 return 0;
1100}
1101EXPORT_SYMBOL_GPL(kvm_get_dr);
1102
1103bool kvm_rdpmc(struct kvm_vcpu *vcpu)
1104{
1105 u32 ecx = kvm_rcx_read(vcpu);
1106 u64 data;
1107 int err;
1108
1109 err = kvm_pmu_rdpmc(vcpu, ecx, &data);
1110 if (err)
1111 return err;
1112 kvm_rax_write(vcpu, (u32)data);
1113 kvm_rdx_write(vcpu, data >> 32);
1114 return err;
1115}
1116EXPORT_SYMBOL_GPL(kvm_rdpmc);
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128static u32 msrs_to_save[] = {
1129 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1130 MSR_STAR,
1131#ifdef CONFIG_X86_64
1132 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1133#endif
1134 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1135 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1136 MSR_IA32_SPEC_CTRL,
1137 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1138 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1139 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1140 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1141 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1142 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1143};
1144
1145static unsigned num_msrs_to_save;
1146
1147static u32 emulated_msrs[] = {
1148 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1149 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1150 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1151 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1152 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1153 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1154 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1155 HV_X64_MSR_RESET,
1156 HV_X64_MSR_VP_INDEX,
1157 HV_X64_MSR_VP_RUNTIME,
1158 HV_X64_MSR_SCONTROL,
1159 HV_X64_MSR_STIMER0_CONFIG,
1160 HV_X64_MSR_VP_ASSIST_PAGE,
1161 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1162 HV_X64_MSR_TSC_EMULATION_STATUS,
1163
1164 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1165 MSR_KVM_PV_EOI_EN,
1166
1167 MSR_IA32_TSC_ADJUST,
1168 MSR_IA32_TSCDEADLINE,
1169 MSR_IA32_ARCH_CAPABILITIES,
1170 MSR_IA32_MISC_ENABLE,
1171 MSR_IA32_MCG_STATUS,
1172 MSR_IA32_MCG_CTL,
1173 MSR_IA32_MCG_EXT_CTL,
1174 MSR_IA32_SMBASE,
1175 MSR_SMI_COUNT,
1176 MSR_PLATFORM_INFO,
1177 MSR_MISC_FEATURES_ENABLES,
1178 MSR_AMD64_VIRT_SPEC_CTRL,
1179 MSR_IA32_POWER_CTL,
1180
1181
1182
1183
1184
1185
1186
1187
1188 MSR_IA32_VMX_BASIC,
1189 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1190 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1191 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1192 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1193 MSR_IA32_VMX_MISC,
1194 MSR_IA32_VMX_CR0_FIXED0,
1195 MSR_IA32_VMX_CR4_FIXED0,
1196 MSR_IA32_VMX_VMCS_ENUM,
1197 MSR_IA32_VMX_PROCBASED_CTLS2,
1198 MSR_IA32_VMX_EPT_VPID_CAP,
1199 MSR_IA32_VMX_VMFUNC,
1200
1201 MSR_K7_HWCR,
1202 MSR_KVM_POLL_CONTROL,
1203};
1204
1205static unsigned num_emulated_msrs;
1206
1207
1208
1209
1210
1211static u32 msr_based_features[] = {
1212 MSR_IA32_VMX_BASIC,
1213 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1214 MSR_IA32_VMX_PINBASED_CTLS,
1215 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1216 MSR_IA32_VMX_PROCBASED_CTLS,
1217 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1218 MSR_IA32_VMX_EXIT_CTLS,
1219 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1220 MSR_IA32_VMX_ENTRY_CTLS,
1221 MSR_IA32_VMX_MISC,
1222 MSR_IA32_VMX_CR0_FIXED0,
1223 MSR_IA32_VMX_CR0_FIXED1,
1224 MSR_IA32_VMX_CR4_FIXED0,
1225 MSR_IA32_VMX_CR4_FIXED1,
1226 MSR_IA32_VMX_VMCS_ENUM,
1227 MSR_IA32_VMX_PROCBASED_CTLS2,
1228 MSR_IA32_VMX_EPT_VPID_CAP,
1229 MSR_IA32_VMX_VMFUNC,
1230
1231 MSR_F10H_DECFG,
1232 MSR_IA32_UCODE_REV,
1233 MSR_IA32_ARCH_CAPABILITIES,
1234};
1235
1236static unsigned int num_msr_based_features;
1237
1238static u64 kvm_get_arch_capabilities(void)
1239{
1240 u64 data = 0;
1241
1242 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1243 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1255 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1256
1257 return data;
1258}
1259
1260static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1261{
1262 switch (msr->index) {
1263 case MSR_IA32_ARCH_CAPABILITIES:
1264 msr->data = kvm_get_arch_capabilities();
1265 break;
1266 case MSR_IA32_UCODE_REV:
1267 rdmsrl_safe(msr->index, &msr->data);
1268 break;
1269 default:
1270 if (kvm_x86_ops->get_msr_feature(msr))
1271 return 1;
1272 }
1273 return 0;
1274}
1275
1276static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1277{
1278 struct kvm_msr_entry msr;
1279 int r;
1280
1281 msr.index = index;
1282 r = kvm_get_msr_feature(&msr);
1283 if (r)
1284 return r;
1285
1286 *data = msr.data;
1287
1288 return 0;
1289}
1290
1291static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1292{
1293 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1294 return false;
1295
1296 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1297 return false;
1298
1299 if (efer & (EFER_LME | EFER_LMA) &&
1300 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1301 return false;
1302
1303 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1304 return false;
1305
1306 return true;
1307
1308}
1309bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1310{
1311 if (efer & efer_reserved_bits)
1312 return false;
1313
1314 return __kvm_valid_efer(vcpu, efer);
1315}
1316EXPORT_SYMBOL_GPL(kvm_valid_efer);
1317
1318static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1319{
1320 u64 old_efer = vcpu->arch.efer;
1321 u64 efer = msr_info->data;
1322
1323 if (efer & efer_reserved_bits)
1324 return 1;
1325
1326 if (!msr_info->host_initiated) {
1327 if (!__kvm_valid_efer(vcpu, efer))
1328 return 1;
1329
1330 if (is_paging(vcpu) &&
1331 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1332 return 1;
1333 }
1334
1335 efer &= ~EFER_LMA;
1336 efer |= vcpu->arch.efer & EFER_LMA;
1337
1338 kvm_x86_ops->set_efer(vcpu, efer);
1339
1340
1341 if ((efer ^ old_efer) & EFER_NX)
1342 kvm_mmu_reset_context(vcpu);
1343
1344 return 0;
1345}
1346
1347void kvm_enable_efer_bits(u64 mask)
1348{
1349 efer_reserved_bits &= ~mask;
1350}
1351EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1352
1353
1354
1355
1356
1357
1358int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
1359{
1360 switch (msr->index) {
1361 case MSR_FS_BASE:
1362 case MSR_GS_BASE:
1363 case MSR_KERNEL_GS_BASE:
1364 case MSR_CSTAR:
1365 case MSR_LSTAR:
1366 if (is_noncanonical_address(msr->data, vcpu))
1367 return 1;
1368 break;
1369 case MSR_IA32_SYSENTER_EIP:
1370 case MSR_IA32_SYSENTER_ESP:
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383 msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
1384 }
1385 return kvm_x86_ops->set_msr(vcpu, msr);
1386}
1387EXPORT_SYMBOL_GPL(kvm_set_msr);
1388
1389
1390
1391
1392static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1393{
1394 struct msr_data msr;
1395 int r;
1396
1397 msr.index = index;
1398 msr.host_initiated = true;
1399 r = kvm_get_msr(vcpu, &msr);
1400 if (r)
1401 return r;
1402
1403 *data = msr.data;
1404 return 0;
1405}
1406
1407static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1408{
1409 struct msr_data msr;
1410
1411 msr.data = *data;
1412 msr.index = index;
1413 msr.host_initiated = true;
1414 return kvm_set_msr(vcpu, &msr);
1415}
1416
1417#ifdef CONFIG_X86_64
1418struct pvclock_gtod_data {
1419 seqcount_t seq;
1420
1421 struct {
1422 int vclock_mode;
1423 u64 cycle_last;
1424 u64 mask;
1425 u32 mult;
1426 u32 shift;
1427 } clock;
1428
1429 u64 boot_ns;
1430 u64 nsec_base;
1431 u64 wall_time_sec;
1432};
1433
1434static struct pvclock_gtod_data pvclock_gtod_data;
1435
1436static void update_pvclock_gtod(struct timekeeper *tk)
1437{
1438 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1439 u64 boot_ns;
1440
1441 boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
1442
1443 write_seqcount_begin(&vdata->seq);
1444
1445
1446 vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
1447 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1448 vdata->clock.mask = tk->tkr_mono.mask;
1449 vdata->clock.mult = tk->tkr_mono.mult;
1450 vdata->clock.shift = tk->tkr_mono.shift;
1451
1452 vdata->boot_ns = boot_ns;
1453 vdata->nsec_base = tk->tkr_mono.xtime_nsec;
1454
1455 vdata->wall_time_sec = tk->xtime_sec;
1456
1457 write_seqcount_end(&vdata->seq);
1458}
1459#endif
1460
1461void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
1462{
1463 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1464 kvm_vcpu_kick(vcpu);
1465}
1466
1467static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1468{
1469 int version;
1470 int r;
1471 struct pvclock_wall_clock wc;
1472 struct timespec64 boot;
1473
1474 if (!wall_clock)
1475 return;
1476
1477 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1478 if (r)
1479 return;
1480
1481 if (version & 1)
1482 ++version;
1483
1484 ++version;
1485
1486 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
1487 return;
1488
1489
1490
1491
1492
1493
1494
1495 getboottime64(&boot);
1496
1497 if (kvm->arch.kvmclock_offset) {
1498 struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
1499 boot = timespec64_sub(boot, ts);
1500 }
1501 wc.sec = (u32)boot.tv_sec;
1502 wc.nsec = boot.tv_nsec;
1503 wc.version = version;
1504
1505 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1506
1507 version++;
1508 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1509}
1510
1511static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1512{
1513 do_shl32_div32(dividend, divisor);
1514 return dividend;
1515}
1516
1517static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1518 s8 *pshift, u32 *pmultiplier)
1519{
1520 uint64_t scaled64;
1521 int32_t shift = 0;
1522 uint64_t tps64;
1523 uint32_t tps32;
1524
1525 tps64 = base_hz;
1526 scaled64 = scaled_hz;
1527 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1528 tps64 >>= 1;
1529 shift--;
1530 }
1531
1532 tps32 = (uint32_t)tps64;
1533 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1534 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1535 scaled64 >>= 1;
1536 else
1537 tps32 <<= 1;
1538 shift++;
1539 }
1540
1541 *pshift = shift;
1542 *pmultiplier = div_frac(scaled64, tps32);
1543}
1544
1545#ifdef CONFIG_X86_64
1546static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1547#endif
1548
1549static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1550static unsigned long max_tsc_khz;
1551
1552static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1553{
1554 u64 v = (u64)khz * (1000000 + ppm);
1555 do_div(v, 1000000);
1556 return v;
1557}
1558
1559static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1560{
1561 u64 ratio;
1562
1563
1564 if (!scale) {
1565 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1566 return 0;
1567 }
1568
1569
1570 if (!kvm_has_tsc_control) {
1571 if (user_tsc_khz > tsc_khz) {
1572 vcpu->arch.tsc_catchup = 1;
1573 vcpu->arch.tsc_always_catchup = 1;
1574 return 0;
1575 } else {
1576 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
1577 return -1;
1578 }
1579 }
1580
1581
1582 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
1583 user_tsc_khz, tsc_khz);
1584
1585 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
1586 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
1587 user_tsc_khz);
1588 return -1;
1589 }
1590
1591 vcpu->arch.tsc_scaling_ratio = ratio;
1592 return 0;
1593}
1594
1595static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1596{
1597 u32 thresh_lo, thresh_hi;
1598 int use_scaling = 0;
1599
1600
1601 if (user_tsc_khz == 0) {
1602
1603 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1604 return -1;
1605 }
1606
1607
1608 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
1609 &vcpu->arch.virtual_tsc_shift,
1610 &vcpu->arch.virtual_tsc_mult);
1611 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
1612
1613
1614
1615
1616
1617
1618
1619 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1620 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1621 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
1622 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
1623 use_scaling = 1;
1624 }
1625 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
1626}
1627
1628static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1629{
1630 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1631 vcpu->arch.virtual_tsc_mult,
1632 vcpu->arch.virtual_tsc_shift);
1633 tsc += vcpu->arch.this_tsc_write;
1634 return tsc;
1635}
1636
1637static inline int gtod_is_based_on_tsc(int mode)
1638{
1639 return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
1640}
1641
1642static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1643{
1644#ifdef CONFIG_X86_64
1645 bool vcpus_matched;
1646 struct kvm_arch *ka = &vcpu->kvm->arch;
1647 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1648
1649 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1650 atomic_read(&vcpu->kvm->online_vcpus));
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660 if (ka->use_master_clock ||
1661 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
1662 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1663
1664 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1665 atomic_read(&vcpu->kvm->online_vcpus),
1666 ka->use_master_clock, gtod->clock.vclock_mode);
1667#endif
1668}
1669
1670static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1671{
1672 u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1673 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1674}
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686static inline u64 __scale_tsc(u64 ratio, u64 tsc)
1687{
1688 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
1689}
1690
1691u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
1692{
1693 u64 _tsc = tsc;
1694 u64 ratio = vcpu->arch.tsc_scaling_ratio;
1695
1696 if (ratio != kvm_default_tsc_scaling_ratio)
1697 _tsc = __scale_tsc(ratio, tsc);
1698
1699 return _tsc;
1700}
1701EXPORT_SYMBOL_GPL(kvm_scale_tsc);
1702
1703static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1704{
1705 u64 tsc;
1706
1707 tsc = kvm_scale_tsc(vcpu, rdtsc());
1708
1709 return target_tsc - tsc;
1710}
1711
1712u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
1713{
1714 u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1715
1716 return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
1717}
1718EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
1719
1720static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1721{
1722 vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
1723}
1724
1725static inline bool kvm_check_tsc_unstable(void)
1726{
1727#ifdef CONFIG_X86_64
1728
1729
1730
1731
1732 if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
1733 return false;
1734#endif
1735 return check_tsc_unstable();
1736}
1737
1738void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1739{
1740 struct kvm *kvm = vcpu->kvm;
1741 u64 offset, ns, elapsed;
1742 unsigned long flags;
1743 bool matched;
1744 bool already_matched;
1745 u64 data = msr->data;
1746 bool synchronizing = false;
1747
1748 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1749 offset = kvm_compute_tsc_offset(vcpu, data);
1750 ns = ktime_get_boottime_ns();
1751 elapsed = ns - kvm->arch.last_tsc_nsec;
1752
1753 if (vcpu->arch.virtual_tsc_khz) {
1754 if (data == 0 && msr->host_initiated) {
1755
1756
1757
1758
1759
1760 synchronizing = true;
1761 } else {
1762 u64 tsc_exp = kvm->arch.last_tsc_write +
1763 nsec_to_cycles(vcpu, elapsed);
1764 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
1765
1766
1767
1768
1769
1770 synchronizing = data < tsc_exp + tsc_hz &&
1771 data + tsc_hz > tsc_exp;
1772 }
1773 }
1774
1775
1776
1777
1778
1779
1780
1781 if (synchronizing &&
1782 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1783 if (!kvm_check_tsc_unstable()) {
1784 offset = kvm->arch.cur_tsc_offset;
1785 } else {
1786 u64 delta = nsec_to_cycles(vcpu, elapsed);
1787 data += delta;
1788 offset = kvm_compute_tsc_offset(vcpu, data);
1789 }
1790 matched = true;
1791 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1792 } else {
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802 kvm->arch.cur_tsc_generation++;
1803 kvm->arch.cur_tsc_nsec = ns;
1804 kvm->arch.cur_tsc_write = data;
1805 kvm->arch.cur_tsc_offset = offset;
1806 matched = false;
1807 }
1808
1809
1810
1811
1812
1813 kvm->arch.last_tsc_nsec = ns;
1814 kvm->arch.last_tsc_write = data;
1815 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1816
1817 vcpu->arch.last_guest_tsc = data;
1818
1819
1820 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1821 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1822 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1823
1824 if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
1825 update_ia32_tsc_adjust_msr(vcpu, offset);
1826
1827 kvm_vcpu_write_tsc_offset(vcpu, offset);
1828 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1829
1830 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1831 if (!matched) {
1832 kvm->arch.nr_vcpus_matched_tsc = 0;
1833 } else if (!already_matched) {
1834 kvm->arch.nr_vcpus_matched_tsc++;
1835 }
1836
1837 kvm_track_tsc_matching(vcpu);
1838 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1839}
1840
1841EXPORT_SYMBOL_GPL(kvm_write_tsc);
1842
1843static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
1844 s64 adjustment)
1845{
1846 u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1847 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
1848}
1849
1850static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
1851{
1852 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
1853 WARN_ON(adjustment < 0);
1854 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
1855 adjust_tsc_offset_guest(vcpu, adjustment);
1856}
1857
1858#ifdef CONFIG_X86_64
1859
1860static u64 read_tsc(void)
1861{
1862 u64 ret = (u64)rdtsc_ordered();
1863 u64 last = pvclock_gtod_data.clock.cycle_last;
1864
1865 if (likely(ret >= last))
1866 return ret;
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876 asm volatile ("");
1877 return last;
1878}
1879
1880static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
1881{
1882 long v;
1883 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1884 u64 tsc_pg_val;
1885
1886 switch (gtod->clock.vclock_mode) {
1887 case VCLOCK_HVCLOCK:
1888 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
1889 tsc_timestamp);
1890 if (tsc_pg_val != U64_MAX) {
1891
1892 *mode = VCLOCK_HVCLOCK;
1893 v = (tsc_pg_val - gtod->clock.cycle_last) &
1894 gtod->clock.mask;
1895 } else {
1896
1897 *mode = VCLOCK_NONE;
1898 }
1899 break;
1900 case VCLOCK_TSC:
1901 *mode = VCLOCK_TSC;
1902 *tsc_timestamp = read_tsc();
1903 v = (*tsc_timestamp - gtod->clock.cycle_last) &
1904 gtod->clock.mask;
1905 break;
1906 default:
1907 *mode = VCLOCK_NONE;
1908 }
1909
1910 if (*mode == VCLOCK_NONE)
1911 *tsc_timestamp = v = 0;
1912
1913 return v * gtod->clock.mult;
1914}
1915
1916static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
1917{
1918 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1919 unsigned long seq;
1920 int mode;
1921 u64 ns;
1922
1923 do {
1924 seq = read_seqcount_begin(>od->seq);
1925 ns = gtod->nsec_base;
1926 ns += vgettsc(tsc_timestamp, &mode);
1927 ns >>= gtod->clock.shift;
1928 ns += gtod->boot_ns;
1929 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1930 *t = ns;
1931
1932 return mode;
1933}
1934
1935static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
1936{
1937 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1938 unsigned long seq;
1939 int mode;
1940 u64 ns;
1941
1942 do {
1943 seq = read_seqcount_begin(>od->seq);
1944 ts->tv_sec = gtod->wall_time_sec;
1945 ns = gtod->nsec_base;
1946 ns += vgettsc(tsc_timestamp, &mode);
1947 ns >>= gtod->clock.shift;
1948 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1949
1950 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
1951 ts->tv_nsec = ns;
1952
1953 return mode;
1954}
1955
1956
1957static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
1958{
1959
1960 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
1961 return false;
1962
1963 return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
1964 tsc_timestamp));
1965}
1966
1967
1968static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
1969 u64 *tsc_timestamp)
1970{
1971
1972 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
1973 return false;
1974
1975 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
1976}
1977#endif
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2021{
2022#ifdef CONFIG_X86_64
2023 struct kvm_arch *ka = &kvm->arch;
2024 int vclock_mode;
2025 bool host_tsc_clocksource, vcpus_matched;
2026
2027 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2028 atomic_read(&kvm->online_vcpus));
2029
2030
2031
2032
2033
2034 host_tsc_clocksource = kvm_get_time_and_clockread(
2035 &ka->master_kernel_ns,
2036 &ka->master_cycle_now);
2037
2038 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2039 && !ka->backwards_tsc_observed
2040 && !ka->boot_vcpu_runs_old_kvmclock;
2041
2042 if (ka->use_master_clock)
2043 atomic_set(&kvm_guest_has_master_clock, 1);
2044
2045 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2046 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2047 vcpus_matched);
2048#endif
2049}
2050
2051void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2052{
2053 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2054}
2055
2056static void kvm_gen_update_masterclock(struct kvm *kvm)
2057{
2058#ifdef CONFIG_X86_64
2059 int i;
2060 struct kvm_vcpu *vcpu;
2061 struct kvm_arch *ka = &kvm->arch;
2062
2063 spin_lock(&ka->pvclock_gtod_sync_lock);
2064 kvm_make_mclock_inprogress_request(kvm);
2065
2066 pvclock_update_vm_gtod_copy(kvm);
2067
2068 kvm_for_each_vcpu(i, vcpu, kvm)
2069 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2070
2071
2072 kvm_for_each_vcpu(i, vcpu, kvm)
2073 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2074
2075 spin_unlock(&ka->pvclock_gtod_sync_lock);
2076#endif
2077}
2078
2079u64 get_kvmclock_ns(struct kvm *kvm)
2080{
2081 struct kvm_arch *ka = &kvm->arch;
2082 struct pvclock_vcpu_time_info hv_clock;
2083 u64 ret;
2084
2085 spin_lock(&ka->pvclock_gtod_sync_lock);
2086 if (!ka->use_master_clock) {
2087 spin_unlock(&ka->pvclock_gtod_sync_lock);
2088 return ktime_get_boottime_ns() + ka->kvmclock_offset;
2089 }
2090
2091 hv_clock.tsc_timestamp = ka->master_cycle_now;
2092 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2093 spin_unlock(&ka->pvclock_gtod_sync_lock);
2094
2095
2096 get_cpu();
2097
2098 if (__this_cpu_read(cpu_tsc_khz)) {
2099 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2100 &hv_clock.tsc_shift,
2101 &hv_clock.tsc_to_system_mul);
2102 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
2103 } else
2104 ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
2105
2106 put_cpu();
2107
2108 return ret;
2109}
2110
2111static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
2112{
2113 struct kvm_vcpu_arch *vcpu = &v->arch;
2114 struct pvclock_vcpu_time_info guest_hv_clock;
2115
2116 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
2117 &guest_hv_clock, sizeof(guest_hv_clock))))
2118 return;
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2135
2136 if (guest_hv_clock.version & 1)
2137 ++guest_hv_clock.version;
2138
2139 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2140 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2141 &vcpu->hv_clock,
2142 sizeof(vcpu->hv_clock.version));
2143
2144 smp_wmb();
2145
2146
2147 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2148
2149 if (vcpu->pvclock_set_guest_stopped_request) {
2150 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2151 vcpu->pvclock_set_guest_stopped_request = false;
2152 }
2153
2154 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2155
2156 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2157 &vcpu->hv_clock,
2158 sizeof(vcpu->hv_clock));
2159
2160 smp_wmb();
2161
2162 vcpu->hv_clock.version++;
2163 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2164 &vcpu->hv_clock,
2165 sizeof(vcpu->hv_clock.version));
2166}
2167
2168static int kvm_guest_time_update(struct kvm_vcpu *v)
2169{
2170 unsigned long flags, tgt_tsc_khz;
2171 struct kvm_vcpu_arch *vcpu = &v->arch;
2172 struct kvm_arch *ka = &v->kvm->arch;
2173 s64 kernel_ns;
2174 u64 tsc_timestamp, host_tsc;
2175 u8 pvclock_flags;
2176 bool use_master_clock;
2177
2178 kernel_ns = 0;
2179 host_tsc = 0;
2180
2181
2182
2183
2184
2185 spin_lock(&ka->pvclock_gtod_sync_lock);
2186 use_master_clock = ka->use_master_clock;
2187 if (use_master_clock) {
2188 host_tsc = ka->master_cycle_now;
2189 kernel_ns = ka->master_kernel_ns;
2190 }
2191 spin_unlock(&ka->pvclock_gtod_sync_lock);
2192
2193
2194 local_irq_save(flags);
2195 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2196 if (unlikely(tgt_tsc_khz == 0)) {
2197 local_irq_restore(flags);
2198 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2199 return 1;
2200 }
2201 if (!use_master_clock) {
2202 host_tsc = rdtsc();
2203 kernel_ns = ktime_get_boottime_ns();
2204 }
2205
2206 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218 if (vcpu->tsc_catchup) {
2219 u64 tsc = compute_guest_tsc(v, kernel_ns);
2220 if (tsc > tsc_timestamp) {
2221 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
2222 tsc_timestamp = tsc;
2223 }
2224 }
2225
2226 local_irq_restore(flags);
2227
2228
2229
2230 if (kvm_has_tsc_control)
2231 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
2232
2233 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
2234 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
2235 &vcpu->hv_clock.tsc_shift,
2236 &vcpu->hv_clock.tsc_to_system_mul);
2237 vcpu->hw_tsc_khz = tgt_tsc_khz;
2238 }
2239
2240 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
2241 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
2242 vcpu->last_guest_tsc = tsc_timestamp;
2243
2244
2245 pvclock_flags = 0;
2246 if (use_master_clock)
2247 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
2248
2249 vcpu->hv_clock.flags = pvclock_flags;
2250
2251 if (vcpu->pv_time_enabled)
2252 kvm_setup_pvclock_page(v);
2253 if (v == kvm_get_vcpu(v->kvm, 0))
2254 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
2255 return 0;
2256}
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
2273
2274static void kvmclock_update_fn(struct work_struct *work)
2275{
2276 int i;
2277 struct delayed_work *dwork = to_delayed_work(work);
2278 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2279 kvmclock_update_work);
2280 struct kvm *kvm = container_of(ka, struct kvm, arch);
2281 struct kvm_vcpu *vcpu;
2282
2283 kvm_for_each_vcpu(i, vcpu, kvm) {
2284 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2285 kvm_vcpu_kick(vcpu);
2286 }
2287}
2288
2289static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
2290{
2291 struct kvm *kvm = v->kvm;
2292
2293 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2294 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
2295 KVMCLOCK_UPDATE_DELAY);
2296}
2297
2298#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
2299
2300static void kvmclock_sync_fn(struct work_struct *work)
2301{
2302 struct delayed_work *dwork = to_delayed_work(work);
2303 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2304 kvmclock_sync_work);
2305 struct kvm *kvm = container_of(ka, struct kvm, arch);
2306
2307 if (!kvmclock_periodic_sync)
2308 return;
2309
2310 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
2311 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
2312 KVMCLOCK_SYNC_PERIOD);
2313}
2314
2315
2316
2317
2318static bool can_set_mci_status(struct kvm_vcpu *vcpu)
2319{
2320
2321 if (guest_cpuid_is_amd(vcpu))
2322 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
2323
2324 return false;
2325}
2326
2327static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2328{
2329 u64 mcg_cap = vcpu->arch.mcg_cap;
2330 unsigned bank_num = mcg_cap & 0xff;
2331 u32 msr = msr_info->index;
2332 u64 data = msr_info->data;
2333
2334 switch (msr) {
2335 case MSR_IA32_MCG_STATUS:
2336 vcpu->arch.mcg_status = data;
2337 break;
2338 case MSR_IA32_MCG_CTL:
2339 if (!(mcg_cap & MCG_CTL_P) &&
2340 (data || !msr_info->host_initiated))
2341 return 1;
2342 if (data != 0 && data != ~(u64)0)
2343 return 1;
2344 vcpu->arch.mcg_ctl = data;
2345 break;
2346 default:
2347 if (msr >= MSR_IA32_MC0_CTL &&
2348 msr < MSR_IA32_MCx_CTL(bank_num)) {
2349 u32 offset = msr - MSR_IA32_MC0_CTL;
2350
2351
2352
2353
2354
2355 if ((offset & 0x3) == 0 &&
2356 data != 0 && (data | (1 << 10)) != ~(u64)0)
2357 return -1;
2358
2359
2360 if (!msr_info->host_initiated &&
2361 (offset & 0x3) == 1 && data != 0) {
2362 if (!can_set_mci_status(vcpu))
2363 return -1;
2364 }
2365
2366 vcpu->arch.mce_banks[offset] = data;
2367 break;
2368 }
2369 return 1;
2370 }
2371 return 0;
2372}
2373
2374static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
2375{
2376 struct kvm *kvm = vcpu->kvm;
2377 int lm = is_long_mode(vcpu);
2378 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
2379 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
2380 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
2381 : kvm->arch.xen_hvm_config.blob_size_32;
2382 u32 page_num = data & ~PAGE_MASK;
2383 u64 page_addr = data & PAGE_MASK;
2384 u8 *page;
2385 int r;
2386
2387 r = -E2BIG;
2388 if (page_num >= blob_size)
2389 goto out;
2390 r = -ENOMEM;
2391 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
2392 if (IS_ERR(page)) {
2393 r = PTR_ERR(page);
2394 goto out;
2395 }
2396 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
2397 goto out_free;
2398 r = 0;
2399out_free:
2400 kfree(page);
2401out:
2402 return r;
2403}
2404
2405static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2406{
2407 gpa_t gpa = data & ~0x3f;
2408
2409
2410 if (data & 0x38)
2411 return 1;
2412
2413 vcpu->arch.apf.msr_val = data;
2414
2415 if (!(data & KVM_ASYNC_PF_ENABLED)) {
2416 kvm_clear_async_pf_completion_queue(vcpu);
2417 kvm_async_pf_hash_reset(vcpu);
2418 return 0;
2419 }
2420
2421 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
2422 sizeof(u32)))
2423 return 1;
2424
2425 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
2426 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
2427 kvm_async_pf_wakeup_all(vcpu);
2428 return 0;
2429}
2430
2431static void kvmclock_reset(struct kvm_vcpu *vcpu)
2432{
2433 vcpu->arch.pv_time_enabled = false;
2434}
2435
2436static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
2437{
2438 ++vcpu->stat.tlb_flush;
2439 kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
2440}
2441
2442static void record_steal_time(struct kvm_vcpu *vcpu)
2443{
2444 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2445 return;
2446
2447 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2448 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2449 return;
2450
2451
2452
2453
2454
2455 if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
2456 kvm_vcpu_flush_tlb(vcpu, false);
2457
2458 if (vcpu->arch.st.steal.version & 1)
2459 vcpu->arch.st.steal.version += 1;
2460
2461 vcpu->arch.st.steal.version += 1;
2462
2463 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2464 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2465
2466 smp_wmb();
2467
2468 vcpu->arch.st.steal.steal += current->sched_info.run_delay -
2469 vcpu->arch.st.last_steal;
2470 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2471
2472 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2473 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2474
2475 smp_wmb();
2476
2477 vcpu->arch.st.steal.version += 1;
2478
2479 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2480 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2481}
2482
2483int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2484{
2485 bool pr = false;
2486 u32 msr = msr_info->index;
2487 u64 data = msr_info->data;
2488
2489 switch (msr) {
2490 case MSR_AMD64_NB_CFG:
2491 case MSR_IA32_UCODE_WRITE:
2492 case MSR_VM_HSAVE_PA:
2493 case MSR_AMD64_PATCH_LOADER:
2494 case MSR_AMD64_BU_CFG2:
2495 case MSR_AMD64_DC_CFG:
2496 case MSR_F15H_EX_CFG:
2497 break;
2498
2499 case MSR_IA32_UCODE_REV:
2500 if (msr_info->host_initiated)
2501 vcpu->arch.microcode_version = data;
2502 break;
2503 case MSR_IA32_ARCH_CAPABILITIES:
2504 if (!msr_info->host_initiated)
2505 return 1;
2506 vcpu->arch.arch_capabilities = data;
2507 break;
2508 case MSR_EFER:
2509 return set_efer(vcpu, msr_info);
2510 case MSR_K7_HWCR:
2511 data &= ~(u64)0x40;
2512 data &= ~(u64)0x100;
2513 data &= ~(u64)0x8;
2514
2515
2516 if (data == BIT_ULL(18)) {
2517 vcpu->arch.msr_hwcr = data;
2518 } else if (data != 0) {
2519 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2520 data);
2521 return 1;
2522 }
2523 break;
2524 case MSR_FAM10H_MMIO_CONF_BASE:
2525 if (data != 0) {
2526 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2527 "0x%llx\n", data);
2528 return 1;
2529 }
2530 break;
2531 case MSR_IA32_DEBUGCTLMSR:
2532 if (!data) {
2533
2534 break;
2535 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2536
2537
2538 return 1;
2539 }
2540 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2541 __func__, data);
2542 break;
2543 case 0x200 ... 0x2ff:
2544 return kvm_mtrr_set_msr(vcpu, msr, data);
2545 case MSR_IA32_APICBASE:
2546 return kvm_set_apic_base(vcpu, msr_info);
2547 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2548 return kvm_x2apic_msr_write(vcpu, msr, data);
2549 case MSR_IA32_TSCDEADLINE:
2550 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2551 break;
2552 case MSR_IA32_TSC_ADJUST:
2553 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
2554 if (!msr_info->host_initiated) {
2555 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2556 adjust_tsc_offset_guest(vcpu, adj);
2557 }
2558 vcpu->arch.ia32_tsc_adjust_msr = data;
2559 }
2560 break;
2561 case MSR_IA32_MISC_ENABLE:
2562 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
2563 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
2564 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
2565 return 1;
2566 vcpu->arch.ia32_misc_enable_msr = data;
2567 kvm_update_cpuid(vcpu);
2568 } else {
2569 vcpu->arch.ia32_misc_enable_msr = data;
2570 }
2571 break;
2572 case MSR_IA32_SMBASE:
2573 if (!msr_info->host_initiated)
2574 return 1;
2575 vcpu->arch.smbase = data;
2576 break;
2577 case MSR_IA32_POWER_CTL:
2578 vcpu->arch.msr_ia32_power_ctl = data;
2579 break;
2580 case MSR_IA32_TSC:
2581 kvm_write_tsc(vcpu, msr_info);
2582 break;
2583 case MSR_SMI_COUNT:
2584 if (!msr_info->host_initiated)
2585 return 1;
2586 vcpu->arch.smi_count = data;
2587 break;
2588 case MSR_KVM_WALL_CLOCK_NEW:
2589 case MSR_KVM_WALL_CLOCK:
2590 vcpu->kvm->arch.wall_clock = data;
2591 kvm_write_wall_clock(vcpu->kvm, data);
2592 break;
2593 case MSR_KVM_SYSTEM_TIME_NEW:
2594 case MSR_KVM_SYSTEM_TIME: {
2595 struct kvm_arch *ka = &vcpu->kvm->arch;
2596
2597 kvmclock_reset(vcpu);
2598
2599 if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
2600 bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
2601
2602 if (ka->boot_vcpu_runs_old_kvmclock != tmp)
2603 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2604
2605 ka->boot_vcpu_runs_old_kvmclock = tmp;
2606 }
2607
2608 vcpu->arch.time = data;
2609 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2610
2611
2612 if (!(data & 1))
2613 break;
2614
2615 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2616 &vcpu->arch.pv_time, data & ~1ULL,
2617 sizeof(struct pvclock_vcpu_time_info)))
2618 vcpu->arch.pv_time_enabled = false;
2619 else
2620 vcpu->arch.pv_time_enabled = true;
2621
2622 break;
2623 }
2624 case MSR_KVM_ASYNC_PF_EN:
2625 if (kvm_pv_enable_async_pf(vcpu, data))
2626 return 1;
2627 break;
2628 case MSR_KVM_STEAL_TIME:
2629
2630 if (unlikely(!sched_info_on()))
2631 return 1;
2632
2633 if (data & KVM_STEAL_RESERVED_MASK)
2634 return 1;
2635
2636 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2637 data & KVM_STEAL_VALID_BITS,
2638 sizeof(struct kvm_steal_time)))
2639 return 1;
2640
2641 vcpu->arch.st.msr_val = data;
2642
2643 if (!(data & KVM_MSR_ENABLED))
2644 break;
2645
2646 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2647
2648 break;
2649 case MSR_KVM_PV_EOI_EN:
2650 if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
2651 return 1;
2652 break;
2653
2654 case MSR_KVM_POLL_CONTROL:
2655
2656 if (data & (-1ULL << 1))
2657 return 1;
2658
2659 vcpu->arch.msr_kvm_poll_control = data;
2660 break;
2661
2662 case MSR_IA32_MCG_CTL:
2663 case MSR_IA32_MCG_STATUS:
2664 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2665 return set_msr_mce(vcpu, msr_info);
2666
2667 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2668 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2669 pr = true;
2670 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2671 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2672 if (kvm_pmu_is_valid_msr(vcpu, msr))
2673 return kvm_pmu_set_msr(vcpu, msr_info);
2674
2675 if (pr || data != 0)
2676 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2677 "0x%x data 0x%llx\n", msr, data);
2678 break;
2679 case MSR_K7_CLK_CTL:
2680
2681
2682
2683
2684
2685
2686
2687
2688 break;
2689 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2690 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2691 case HV_X64_MSR_CRASH_CTL:
2692 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2693 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
2694 case HV_X64_MSR_TSC_EMULATION_CONTROL:
2695 case HV_X64_MSR_TSC_EMULATION_STATUS:
2696 return kvm_hv_set_msr_common(vcpu, msr, data,
2697 msr_info->host_initiated);
2698 case MSR_IA32_BBL_CR_CTL3:
2699
2700
2701
2702 if (report_ignored_msrs)
2703 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
2704 msr, data);
2705 break;
2706 case MSR_AMD64_OSVW_ID_LENGTH:
2707 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2708 return 1;
2709 vcpu->arch.osvw.length = data;
2710 break;
2711 case MSR_AMD64_OSVW_STATUS:
2712 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2713 return 1;
2714 vcpu->arch.osvw.status = data;
2715 break;
2716 case MSR_PLATFORM_INFO:
2717 if (!msr_info->host_initiated ||
2718 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
2719 cpuid_fault_enabled(vcpu)))
2720 return 1;
2721 vcpu->arch.msr_platform_info = data;
2722 break;
2723 case MSR_MISC_FEATURES_ENABLES:
2724 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
2725 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
2726 !supports_cpuid_fault(vcpu)))
2727 return 1;
2728 vcpu->arch.msr_misc_features_enables = data;
2729 break;
2730 default:
2731 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2732 return xen_hvm_config(vcpu, data);
2733 if (kvm_pmu_is_valid_msr(vcpu, msr))
2734 return kvm_pmu_set_msr(vcpu, msr_info);
2735 if (!ignore_msrs) {
2736 vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
2737 msr, data);
2738 return 1;
2739 } else {
2740 if (report_ignored_msrs)
2741 vcpu_unimpl(vcpu,
2742 "ignored wrmsr: 0x%x data 0x%llx\n",
2743 msr, data);
2744 break;
2745 }
2746 }
2747 return 0;
2748}
2749EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2750
2751
2752
2753
2754
2755
2756
2757int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
2758{
2759 return kvm_x86_ops->get_msr(vcpu, msr);
2760}
2761EXPORT_SYMBOL_GPL(kvm_get_msr);
2762
2763static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
2764{
2765 u64 data;
2766 u64 mcg_cap = vcpu->arch.mcg_cap;
2767 unsigned bank_num = mcg_cap & 0xff;
2768
2769 switch (msr) {
2770 case MSR_IA32_P5_MC_ADDR:
2771 case MSR_IA32_P5_MC_TYPE:
2772 data = 0;
2773 break;
2774 case MSR_IA32_MCG_CAP:
2775 data = vcpu->arch.mcg_cap;
2776 break;
2777 case MSR_IA32_MCG_CTL:
2778 if (!(mcg_cap & MCG_CTL_P) && !host)
2779 return 1;
2780 data = vcpu->arch.mcg_ctl;
2781 break;
2782 case MSR_IA32_MCG_STATUS:
2783 data = vcpu->arch.mcg_status;
2784 break;
2785 default:
2786 if (msr >= MSR_IA32_MC0_CTL &&
2787 msr < MSR_IA32_MCx_CTL(bank_num)) {
2788 u32 offset = msr - MSR_IA32_MC0_CTL;
2789 data = vcpu->arch.mce_banks[offset];
2790 break;
2791 }
2792 return 1;
2793 }
2794 *pdata = data;
2795 return 0;
2796}
2797
2798int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2799{
2800 switch (msr_info->index) {
2801 case MSR_IA32_PLATFORM_ID:
2802 case MSR_IA32_EBL_CR_POWERON:
2803 case MSR_IA32_DEBUGCTLMSR:
2804 case MSR_IA32_LASTBRANCHFROMIP:
2805 case MSR_IA32_LASTBRANCHTOIP:
2806 case MSR_IA32_LASTINTFROMIP:
2807 case MSR_IA32_LASTINTTOIP:
2808 case MSR_K8_SYSCFG:
2809 case MSR_K8_TSEG_ADDR:
2810 case MSR_K8_TSEG_MASK:
2811 case MSR_VM_HSAVE_PA:
2812 case MSR_K8_INT_PENDING_MSG:
2813 case MSR_AMD64_NB_CFG:
2814 case MSR_FAM10H_MMIO_CONF_BASE:
2815 case MSR_AMD64_BU_CFG2:
2816 case MSR_IA32_PERF_CTL:
2817 case MSR_AMD64_DC_CFG:
2818 case MSR_F15H_EX_CFG:
2819 msr_info->data = 0;
2820 break;
2821 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
2822 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2823 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2824 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2825 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2826 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2827 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2828 msr_info->data = 0;
2829 break;
2830 case MSR_IA32_UCODE_REV:
2831 msr_info->data = vcpu->arch.microcode_version;
2832 break;
2833 case MSR_IA32_ARCH_CAPABILITIES:
2834 if (!msr_info->host_initiated &&
2835 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
2836 return 1;
2837 msr_info->data = vcpu->arch.arch_capabilities;
2838 break;
2839 case MSR_IA32_POWER_CTL:
2840 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
2841 break;
2842 case MSR_IA32_TSC:
2843 msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
2844 break;
2845 case MSR_MTRRcap:
2846 case 0x200 ... 0x2ff:
2847 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
2848 case 0xcd:
2849 msr_info->data = 3;
2850 break;
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862 case MSR_EBC_FREQUENCY_ID:
2863 msr_info->data = 1 << 24;
2864 break;
2865 case MSR_IA32_APICBASE:
2866 msr_info->data = kvm_get_apic_base(vcpu);
2867 break;
2868 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2869 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
2870 break;
2871 case MSR_IA32_TSCDEADLINE:
2872 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
2873 break;
2874 case MSR_IA32_TSC_ADJUST:
2875 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2876 break;
2877 case MSR_IA32_MISC_ENABLE:
2878 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
2879 break;
2880 case MSR_IA32_SMBASE:
2881 if (!msr_info->host_initiated)
2882 return 1;
2883 msr_info->data = vcpu->arch.smbase;
2884 break;
2885 case MSR_SMI_COUNT:
2886 msr_info->data = vcpu->arch.smi_count;
2887 break;
2888 case MSR_IA32_PERF_STATUS:
2889
2890 msr_info->data = 1000ULL;
2891
2892 msr_info->data |= (((uint64_t)4ULL) << 40);
2893 break;
2894 case MSR_EFER:
2895 msr_info->data = vcpu->arch.efer;
2896 break;
2897 case MSR_KVM_WALL_CLOCK:
2898 case MSR_KVM_WALL_CLOCK_NEW:
2899 msr_info->data = vcpu->kvm->arch.wall_clock;
2900 break;
2901 case MSR_KVM_SYSTEM_TIME:
2902 case MSR_KVM_SYSTEM_TIME_NEW:
2903 msr_info->data = vcpu->arch.time;
2904 break;
2905 case MSR_KVM_ASYNC_PF_EN:
2906 msr_info->data = vcpu->arch.apf.msr_val;
2907 break;
2908 case MSR_KVM_STEAL_TIME:
2909 msr_info->data = vcpu->arch.st.msr_val;
2910 break;
2911 case MSR_KVM_PV_EOI_EN:
2912 msr_info->data = vcpu->arch.pv_eoi.msr_val;
2913 break;
2914 case MSR_KVM_POLL_CONTROL:
2915 msr_info->data = vcpu->arch.msr_kvm_poll_control;
2916 break;
2917 case MSR_IA32_P5_MC_ADDR:
2918 case MSR_IA32_P5_MC_TYPE:
2919 case MSR_IA32_MCG_CAP:
2920 case MSR_IA32_MCG_CTL:
2921 case MSR_IA32_MCG_STATUS:
2922 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2923 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
2924 msr_info->host_initiated);
2925 case MSR_K7_CLK_CTL:
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935 msr_info->data = 0x20000000;
2936 break;
2937 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2938 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2939 case HV_X64_MSR_CRASH_CTL:
2940 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2941 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
2942 case HV_X64_MSR_TSC_EMULATION_CONTROL:
2943 case HV_X64_MSR_TSC_EMULATION_STATUS:
2944 return kvm_hv_get_msr_common(vcpu,
2945 msr_info->index, &msr_info->data,
2946 msr_info->host_initiated);
2947 break;
2948 case MSR_IA32_BBL_CR_CTL3:
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959 msr_info->data = 0xbe702111;
2960 break;
2961 case MSR_AMD64_OSVW_ID_LENGTH:
2962 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2963 return 1;
2964 msr_info->data = vcpu->arch.osvw.length;
2965 break;
2966 case MSR_AMD64_OSVW_STATUS:
2967 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2968 return 1;
2969 msr_info->data = vcpu->arch.osvw.status;
2970 break;
2971 case MSR_PLATFORM_INFO:
2972 if (!msr_info->host_initiated &&
2973 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
2974 return 1;
2975 msr_info->data = vcpu->arch.msr_platform_info;
2976 break;
2977 case MSR_MISC_FEATURES_ENABLES:
2978 msr_info->data = vcpu->arch.msr_misc_features_enables;
2979 break;
2980 case MSR_K7_HWCR:
2981 msr_info->data = vcpu->arch.msr_hwcr;
2982 break;
2983 default:
2984 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2985 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2986 if (!ignore_msrs) {
2987 vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
2988 msr_info->index);
2989 return 1;
2990 } else {
2991 if (report_ignored_msrs)
2992 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
2993 msr_info->index);
2994 msr_info->data = 0;
2995 }
2996 break;
2997 }
2998 return 0;
2999}
3000EXPORT_SYMBOL_GPL(kvm_get_msr_common);
3001
3002
3003
3004
3005
3006
3007static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
3008 struct kvm_msr_entry *entries,
3009 int (*do_msr)(struct kvm_vcpu *vcpu,
3010 unsigned index, u64 *data))
3011{
3012 int i;
3013
3014 for (i = 0; i < msrs->nmsrs; ++i)
3015 if (do_msr(vcpu, entries[i].index, &entries[i].data))
3016 break;
3017
3018 return i;
3019}
3020
3021
3022
3023
3024
3025
3026static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
3027 int (*do_msr)(struct kvm_vcpu *vcpu,
3028 unsigned index, u64 *data),
3029 int writeback)
3030{
3031 struct kvm_msrs msrs;
3032 struct kvm_msr_entry *entries;
3033 int r, n;
3034 unsigned size;
3035
3036 r = -EFAULT;
3037 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
3038 goto out;
3039
3040 r = -E2BIG;
3041 if (msrs.nmsrs >= MAX_IO_MSRS)
3042 goto out;
3043
3044 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
3045 entries = memdup_user(user_msrs->entries, size);
3046 if (IS_ERR(entries)) {
3047 r = PTR_ERR(entries);
3048 goto out;
3049 }
3050
3051 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
3052 if (r < 0)
3053 goto out_free;
3054
3055 r = -EFAULT;
3056 if (writeback && copy_to_user(user_msrs->entries, entries, size))
3057 goto out_free;
3058
3059 r = n;
3060
3061out_free:
3062 kfree(entries);
3063out:
3064 return r;
3065}
3066
3067static inline bool kvm_can_mwait_in_guest(void)
3068{
3069 return boot_cpu_has(X86_FEATURE_MWAIT) &&
3070 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
3071 boot_cpu_has(X86_FEATURE_ARAT);
3072}
3073
3074int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
3075{
3076 int r = 0;
3077
3078 switch (ext) {
3079 case KVM_CAP_IRQCHIP:
3080 case KVM_CAP_HLT:
3081 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
3082 case KVM_CAP_SET_TSS_ADDR:
3083 case KVM_CAP_EXT_CPUID:
3084 case KVM_CAP_EXT_EMUL_CPUID:
3085 case KVM_CAP_CLOCKSOURCE:
3086 case KVM_CAP_PIT:
3087 case KVM_CAP_NOP_IO_DELAY:
3088 case KVM_CAP_MP_STATE:
3089 case KVM_CAP_SYNC_MMU:
3090 case KVM_CAP_USER_NMI:
3091 case KVM_CAP_REINJECT_CONTROL:
3092 case KVM_CAP_IRQ_INJECT_STATUS:
3093 case KVM_CAP_IOEVENTFD:
3094 case KVM_CAP_IOEVENTFD_NO_LENGTH:
3095 case KVM_CAP_PIT2:
3096 case KVM_CAP_PIT_STATE2:
3097 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
3098 case KVM_CAP_XEN_HVM:
3099 case KVM_CAP_VCPU_EVENTS:
3100 case KVM_CAP_HYPERV:
3101 case KVM_CAP_HYPERV_VAPIC:
3102 case KVM_CAP_HYPERV_SPIN:
3103 case KVM_CAP_HYPERV_SYNIC:
3104 case KVM_CAP_HYPERV_SYNIC2:
3105 case KVM_CAP_HYPERV_VP_INDEX:
3106 case KVM_CAP_HYPERV_EVENTFD:
3107 case KVM_CAP_HYPERV_TLBFLUSH:
3108 case KVM_CAP_HYPERV_SEND_IPI:
3109 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
3110 case KVM_CAP_HYPERV_CPUID:
3111 case KVM_CAP_PCI_SEGMENT:
3112 case KVM_CAP_DEBUGREGS:
3113 case KVM_CAP_X86_ROBUST_SINGLESTEP:
3114 case KVM_CAP_XSAVE:
3115 case KVM_CAP_ASYNC_PF:
3116 case KVM_CAP_GET_TSC_KHZ:
3117 case KVM_CAP_KVMCLOCK_CTRL:
3118 case KVM_CAP_READONLY_MEM:
3119 case KVM_CAP_HYPERV_TIME:
3120 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
3121 case KVM_CAP_TSC_DEADLINE_TIMER:
3122 case KVM_CAP_DISABLE_QUIRKS:
3123 case KVM_CAP_SET_BOOT_CPU_ID:
3124 case KVM_CAP_SPLIT_IRQCHIP:
3125 case KVM_CAP_IMMEDIATE_EXIT:
3126 case KVM_CAP_PMU_EVENT_FILTER:
3127 case KVM_CAP_GET_MSR_FEATURES:
3128 case KVM_CAP_MSR_PLATFORM_INFO:
3129 case KVM_CAP_EXCEPTION_PAYLOAD:
3130 r = 1;
3131 break;
3132 case KVM_CAP_SYNC_REGS:
3133 r = KVM_SYNC_X86_VALID_FIELDS;
3134 break;
3135 case KVM_CAP_ADJUST_CLOCK:
3136 r = KVM_CLOCK_TSC_STABLE;
3137 break;
3138 case KVM_CAP_X86_DISABLE_EXITS:
3139 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
3140 KVM_X86_DISABLE_EXITS_CSTATE;
3141 if(kvm_can_mwait_in_guest())
3142 r |= KVM_X86_DISABLE_EXITS_MWAIT;
3143 break;
3144 case KVM_CAP_X86_SMM:
3145
3146
3147
3148
3149
3150
3151
3152
3153 r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
3154 break;
3155 case KVM_CAP_VAPIC:
3156 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
3157 break;
3158 case KVM_CAP_NR_VCPUS:
3159 r = KVM_SOFT_MAX_VCPUS;
3160 break;
3161 case KVM_CAP_MAX_VCPUS:
3162 r = KVM_MAX_VCPUS;
3163 break;
3164 case KVM_CAP_MAX_VCPU_ID:
3165 r = KVM_MAX_VCPU_ID;
3166 break;
3167 case KVM_CAP_PV_MMU:
3168 r = 0;
3169 break;
3170 case KVM_CAP_MCE:
3171 r = KVM_MAX_MCE_BANKS;
3172 break;
3173 case KVM_CAP_XCRS:
3174 r = boot_cpu_has(X86_FEATURE_XSAVE);
3175 break;
3176 case KVM_CAP_TSC_CONTROL:
3177 r = kvm_has_tsc_control;
3178 break;
3179 case KVM_CAP_X2APIC_API:
3180 r = KVM_X2APIC_API_VALID_FLAGS;
3181 break;
3182 case KVM_CAP_NESTED_STATE:
3183 r = kvm_x86_ops->get_nested_state ?
3184 kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
3185 break;
3186 default:
3187 break;
3188 }
3189 return r;
3190
3191}
3192
3193long kvm_arch_dev_ioctl(struct file *filp,
3194 unsigned int ioctl, unsigned long arg)
3195{
3196 void __user *argp = (void __user *)arg;
3197 long r;
3198
3199 switch (ioctl) {
3200 case KVM_GET_MSR_INDEX_LIST: {
3201 struct kvm_msr_list __user *user_msr_list = argp;
3202 struct kvm_msr_list msr_list;
3203 unsigned n;
3204
3205 r = -EFAULT;
3206 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3207 goto out;
3208 n = msr_list.nmsrs;
3209 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
3210 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3211 goto out;
3212 r = -E2BIG;
3213 if (n < msr_list.nmsrs)
3214 goto out;
3215 r = -EFAULT;
3216 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
3217 num_msrs_to_save * sizeof(u32)))
3218 goto out;
3219 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
3220 &emulated_msrs,
3221 num_emulated_msrs * sizeof(u32)))
3222 goto out;
3223 r = 0;
3224 break;
3225 }
3226 case KVM_GET_SUPPORTED_CPUID:
3227 case KVM_GET_EMULATED_CPUID: {
3228 struct kvm_cpuid2 __user *cpuid_arg = argp;
3229 struct kvm_cpuid2 cpuid;
3230
3231 r = -EFAULT;
3232 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
3233 goto out;
3234
3235 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
3236 ioctl);
3237 if (r)
3238 goto out;
3239
3240 r = -EFAULT;
3241 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
3242 goto out;
3243 r = 0;
3244 break;
3245 }
3246 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
3247 r = -EFAULT;
3248 if (copy_to_user(argp, &kvm_mce_cap_supported,
3249 sizeof(kvm_mce_cap_supported)))
3250 goto out;
3251 r = 0;
3252 break;
3253 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
3254 struct kvm_msr_list __user *user_msr_list = argp;
3255 struct kvm_msr_list msr_list;
3256 unsigned int n;
3257
3258 r = -EFAULT;
3259 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3260 goto out;
3261 n = msr_list.nmsrs;
3262 msr_list.nmsrs = num_msr_based_features;
3263 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3264 goto out;
3265 r = -E2BIG;
3266 if (n < msr_list.nmsrs)
3267 goto out;
3268 r = -EFAULT;
3269 if (copy_to_user(user_msr_list->indices, &msr_based_features,
3270 num_msr_based_features * sizeof(u32)))
3271 goto out;
3272 r = 0;
3273 break;
3274 }
3275 case KVM_GET_MSRS:
3276 r = msr_io(NULL, argp, do_get_msr_feature, 1);
3277 break;
3278 }
3279 default:
3280 r = -EINVAL;
3281 }
3282out:
3283 return r;
3284}
3285
3286static void wbinvd_ipi(void *garbage)
3287{
3288 wbinvd();
3289}
3290
3291static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
3292{
3293 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
3294}
3295
3296void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3297{
3298
3299 if (need_emulate_wbinvd(vcpu)) {
3300 if (kvm_x86_ops->has_wbinvd_exit())
3301 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
3302 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
3303 smp_call_function_single(vcpu->cpu,
3304 wbinvd_ipi, NULL, 1);
3305 }
3306
3307 kvm_x86_ops->vcpu_load(vcpu, cpu);
3308
3309 fpregs_assert_state_consistent();
3310 if (test_thread_flag(TIF_NEED_FPU_LOAD))
3311 switch_fpu_return();
3312
3313
3314 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
3315 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
3316 vcpu->arch.tsc_offset_adjustment = 0;
3317 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3318 }
3319
3320 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
3321 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
3322 rdtsc() - vcpu->arch.last_host_tsc;
3323 if (tsc_delta < 0)
3324 mark_tsc_unstable("KVM discovered backwards TSC");
3325
3326 if (kvm_check_tsc_unstable()) {
3327 u64 offset = kvm_compute_tsc_offset(vcpu,
3328 vcpu->arch.last_guest_tsc);
3329 kvm_vcpu_write_tsc_offset(vcpu, offset);
3330 vcpu->arch.tsc_catchup = 1;
3331 }
3332
3333 if (kvm_lapic_hv_timer_in_use(vcpu))
3334 kvm_lapic_restart_hv_timer(vcpu);
3335
3336
3337
3338
3339
3340 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
3341 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
3342 if (vcpu->cpu != cpu)
3343 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
3344 vcpu->cpu = cpu;
3345 }
3346
3347 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3348}
3349
3350static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
3351{
3352 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3353 return;
3354
3355 vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
3356
3357 kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
3358 &vcpu->arch.st.steal.preempted,
3359 offsetof(struct kvm_steal_time, preempted),
3360 sizeof(vcpu->arch.st.steal.preempted));
3361}
3362
3363void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3364{
3365 int idx;
3366
3367 if (vcpu->preempted)
3368 vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378 pagefault_disable();
3379
3380
3381
3382
3383 idx = srcu_read_lock(&vcpu->kvm->srcu);
3384 kvm_steal_time_set_preempted(vcpu);
3385 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3386 pagefault_enable();
3387 kvm_x86_ops->vcpu_put(vcpu);
3388 vcpu->arch.last_host_tsc = rdtsc();
3389
3390
3391
3392
3393
3394 set_debugreg(0, 6);
3395}
3396
3397static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
3398 struct kvm_lapic_state *s)
3399{
3400 if (vcpu->arch.apicv_active)
3401 kvm_x86_ops->sync_pir_to_irr(vcpu);
3402
3403 return kvm_apic_get_state(vcpu, s);
3404}
3405
3406static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
3407 struct kvm_lapic_state *s)
3408{
3409 int r;
3410
3411 r = kvm_apic_set_state(vcpu, s);
3412 if (r)
3413 return r;
3414 update_cr8_intercept(vcpu);
3415
3416 return 0;
3417}
3418
3419static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
3420{
3421 return (!lapic_in_kernel(vcpu) ||
3422 kvm_apic_accept_pic_intr(vcpu));
3423}
3424
3425
3426
3427
3428
3429
3430
3431static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
3432{
3433 return kvm_arch_interrupt_allowed(vcpu) &&
3434 !kvm_cpu_has_interrupt(vcpu) &&
3435 !kvm_event_needs_reinjection(vcpu) &&
3436 kvm_cpu_accept_dm_intr(vcpu);
3437}
3438
3439static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
3440 struct kvm_interrupt *irq)
3441{
3442 if (irq->irq >= KVM_NR_INTERRUPTS)
3443 return -EINVAL;
3444
3445 if (!irqchip_in_kernel(vcpu->kvm)) {
3446 kvm_queue_interrupt(vcpu, irq->irq, false);
3447 kvm_make_request(KVM_REQ_EVENT, vcpu);
3448 return 0;
3449 }
3450
3451
3452
3453
3454
3455 if (pic_in_kernel(vcpu->kvm))
3456 return -ENXIO;
3457
3458 if (vcpu->arch.pending_external_vector != -1)
3459 return -EEXIST;
3460
3461 vcpu->arch.pending_external_vector = irq->irq;
3462 kvm_make_request(KVM_REQ_EVENT, vcpu);
3463 return 0;
3464}
3465
3466static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
3467{
3468 kvm_inject_nmi(vcpu);
3469
3470 return 0;
3471}
3472
3473static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
3474{
3475 kvm_make_request(KVM_REQ_SMI, vcpu);
3476
3477 return 0;
3478}
3479
3480static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
3481 struct kvm_tpr_access_ctl *tac)
3482{
3483 if (tac->flags)
3484 return -EINVAL;
3485 vcpu->arch.tpr_access_reporting = !!tac->enabled;
3486 return 0;
3487}
3488
3489static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
3490 u64 mcg_cap)
3491{
3492 int r;
3493 unsigned bank_num = mcg_cap & 0xff, bank;
3494
3495 r = -EINVAL;
3496 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
3497 goto out;
3498 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
3499 goto out;
3500 r = 0;
3501 vcpu->arch.mcg_cap = mcg_cap;
3502
3503 if (mcg_cap & MCG_CTL_P)
3504 vcpu->arch.mcg_ctl = ~(u64)0;
3505
3506 for (bank = 0; bank < bank_num; bank++)
3507 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
3508
3509 if (kvm_x86_ops->setup_mce)
3510 kvm_x86_ops->setup_mce(vcpu);
3511out:
3512 return r;
3513}
3514
3515static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
3516 struct kvm_x86_mce *mce)
3517{
3518 u64 mcg_cap = vcpu->arch.mcg_cap;
3519 unsigned bank_num = mcg_cap & 0xff;
3520 u64 *banks = vcpu->arch.mce_banks;
3521
3522 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
3523 return -EINVAL;
3524
3525
3526
3527
3528 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
3529 vcpu->arch.mcg_ctl != ~(u64)0)
3530 return 0;
3531 banks += 4 * mce->bank;
3532
3533
3534
3535
3536 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
3537 return 0;
3538 if (mce->status & MCI_STATUS_UC) {
3539 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
3540 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
3541 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
3542 return 0;
3543 }
3544 if (banks[1] & MCI_STATUS_VAL)
3545 mce->status |= MCI_STATUS_OVER;
3546 banks[2] = mce->addr;
3547 banks[3] = mce->misc;
3548 vcpu->arch.mcg_status = mce->mcg_status;
3549 banks[1] = mce->status;
3550 kvm_queue_exception(vcpu, MC_VECTOR);
3551 } else if (!(banks[1] & MCI_STATUS_VAL)
3552 || !(banks[1] & MCI_STATUS_UC)) {
3553 if (banks[1] & MCI_STATUS_VAL)
3554 mce->status |= MCI_STATUS_OVER;
3555 banks[2] = mce->addr;
3556 banks[3] = mce->misc;
3557 banks[1] = mce->status;
3558 } else
3559 banks[1] |= MCI_STATUS_OVER;
3560 return 0;
3561}
3562
3563static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
3564 struct kvm_vcpu_events *events)
3565{
3566 process_nmi(vcpu);
3567
3568
3569
3570
3571
3572
3573
3574 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
3575 events->exception.injected = 0;
3576 events->exception.pending = 0;
3577 } else {
3578 events->exception.injected = vcpu->arch.exception.injected;
3579 events->exception.pending = vcpu->arch.exception.pending;
3580
3581
3582
3583
3584
3585 if (!vcpu->kvm->arch.exception_payload_enabled)
3586 events->exception.injected |=
3587 vcpu->arch.exception.pending;
3588 }
3589 events->exception.nr = vcpu->arch.exception.nr;
3590 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
3591 events->exception.error_code = vcpu->arch.exception.error_code;
3592 events->exception_has_payload = vcpu->arch.exception.has_payload;
3593 events->exception_payload = vcpu->arch.exception.payload;
3594
3595 events->interrupt.injected =
3596 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
3597 events->interrupt.nr = vcpu->arch.interrupt.nr;
3598 events->interrupt.soft = 0;
3599 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
3600
3601 events->nmi.injected = vcpu->arch.nmi_injected;
3602 events->nmi.pending = vcpu->arch.nmi_pending != 0;
3603 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
3604 events->nmi.pad = 0;
3605
3606 events->sipi_vector = 0;
3607
3608 events->smi.smm = is_smm(vcpu);
3609 events->smi.pending = vcpu->arch.smi_pending;
3610 events->smi.smm_inside_nmi =
3611 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
3612 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
3613
3614 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
3615 | KVM_VCPUEVENT_VALID_SHADOW
3616 | KVM_VCPUEVENT_VALID_SMM);
3617 if (vcpu->kvm->arch.exception_payload_enabled)
3618 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
3619
3620 memset(&events->reserved, 0, sizeof(events->reserved));
3621}
3622
3623static void kvm_smm_changed(struct kvm_vcpu *vcpu);
3624
3625static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3626 struct kvm_vcpu_events *events)
3627{
3628 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
3629 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
3630 | KVM_VCPUEVENT_VALID_SHADOW
3631 | KVM_VCPUEVENT_VALID_SMM
3632 | KVM_VCPUEVENT_VALID_PAYLOAD))
3633 return -EINVAL;
3634
3635 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
3636 if (!vcpu->kvm->arch.exception_payload_enabled)
3637 return -EINVAL;
3638 if (events->exception.pending)
3639 events->exception.injected = 0;
3640 else
3641 events->exception_has_payload = 0;
3642 } else {
3643 events->exception.pending = 0;
3644 events->exception_has_payload = 0;
3645 }
3646
3647 if ((events->exception.injected || events->exception.pending) &&
3648 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
3649 return -EINVAL;
3650
3651
3652 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
3653 (events->smi.smm || events->smi.pending) &&
3654 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3655 return -EINVAL;
3656
3657 process_nmi(vcpu);
3658 vcpu->arch.exception.injected = events->exception.injected;
3659 vcpu->arch.exception.pending = events->exception.pending;
3660 vcpu->arch.exception.nr = events->exception.nr;
3661 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
3662 vcpu->arch.exception.error_code = events->exception.error_code;
3663 vcpu->arch.exception.has_payload = events->exception_has_payload;
3664 vcpu->arch.exception.payload = events->exception_payload;
3665
3666 vcpu->arch.interrupt.injected = events->interrupt.injected;
3667 vcpu->arch.interrupt.nr = events->interrupt.nr;
3668 vcpu->arch.interrupt.soft = events->interrupt.soft;
3669 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
3670 kvm_x86_ops->set_interrupt_shadow(vcpu,
3671 events->interrupt.shadow);
3672
3673 vcpu->arch.nmi_injected = events->nmi.injected;
3674 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
3675 vcpu->arch.nmi_pending = events->nmi.pending;
3676 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
3677
3678 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
3679 lapic_in_kernel(vcpu))
3680 vcpu->arch.apic->sipi_vector = events->sipi_vector;
3681
3682 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
3683 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
3684 if (events->smi.smm)
3685 vcpu->arch.hflags |= HF_SMM_MASK;
3686 else
3687 vcpu->arch.hflags &= ~HF_SMM_MASK;
3688 kvm_smm_changed(vcpu);
3689 }
3690
3691 vcpu->arch.smi_pending = events->smi.pending;
3692
3693 if (events->smi.smm) {
3694 if (events->smi.smm_inside_nmi)
3695 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
3696 else
3697 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
3698 if (lapic_in_kernel(vcpu)) {
3699 if (events->smi.latched_init)
3700 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3701 else
3702 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3703 }
3704 }
3705 }
3706
3707 kvm_make_request(KVM_REQ_EVENT, vcpu);
3708
3709 return 0;
3710}
3711
3712static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3713 struct kvm_debugregs *dbgregs)
3714{
3715 unsigned long val;
3716
3717 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3718 kvm_get_dr(vcpu, 6, &val);
3719 dbgregs->dr6 = val;
3720 dbgregs->dr7 = vcpu->arch.dr7;
3721 dbgregs->flags = 0;
3722 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
3723}
3724
3725static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3726 struct kvm_debugregs *dbgregs)
3727{
3728 if (dbgregs->flags)
3729 return -EINVAL;
3730
3731 if (dbgregs->dr6 & ~0xffffffffull)
3732 return -EINVAL;
3733 if (dbgregs->dr7 & ~0xffffffffull)
3734 return -EINVAL;
3735
3736 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
3737 kvm_update_dr0123(vcpu);
3738 vcpu->arch.dr6 = dbgregs->dr6;
3739 kvm_update_dr6(vcpu);
3740 vcpu->arch.dr7 = dbgregs->dr7;
3741 kvm_update_dr7(vcpu);
3742
3743 return 0;
3744}
3745
3746#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
3747
3748static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3749{
3750 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
3751 u64 xstate_bv = xsave->header.xfeatures;
3752 u64 valid;
3753
3754
3755
3756
3757
3758 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
3759
3760
3761 xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
3762 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
3763
3764
3765
3766
3767
3768 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3769 while (valid) {
3770 u64 xfeature_mask = valid & -valid;
3771 int xfeature_nr = fls64(xfeature_mask) - 1;
3772 void *src = get_xsave_addr(xsave, xfeature_nr);
3773
3774 if (src) {
3775 u32 size, offset, ecx, edx;
3776 cpuid_count(XSTATE_CPUID, xfeature_nr,
3777 &size, &offset, &ecx, &edx);
3778 if (xfeature_nr == XFEATURE_PKRU)
3779 memcpy(dest + offset, &vcpu->arch.pkru,
3780 sizeof(vcpu->arch.pkru));
3781 else
3782 memcpy(dest + offset, src, size);
3783
3784 }
3785
3786 valid -= xfeature_mask;
3787 }
3788}
3789
3790static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3791{
3792 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
3793 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
3794 u64 valid;
3795
3796
3797
3798
3799
3800 memcpy(xsave, src, XSAVE_HDR_OFFSET);
3801
3802
3803 xsave->header.xfeatures = xstate_bv;
3804 if (boot_cpu_has(X86_FEATURE_XSAVES))
3805 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
3806
3807
3808
3809
3810
3811 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3812 while (valid) {
3813 u64 xfeature_mask = valid & -valid;
3814 int xfeature_nr = fls64(xfeature_mask) - 1;
3815 void *dest = get_xsave_addr(xsave, xfeature_nr);
3816
3817 if (dest) {
3818 u32 size, offset, ecx, edx;
3819 cpuid_count(XSTATE_CPUID, xfeature_nr,
3820 &size, &offset, &ecx, &edx);
3821 if (xfeature_nr == XFEATURE_PKRU)
3822 memcpy(&vcpu->arch.pkru, src + offset,
3823 sizeof(vcpu->arch.pkru));
3824 else
3825 memcpy(dest, src + offset, size);
3826 }
3827
3828 valid -= xfeature_mask;
3829 }
3830}
3831
3832static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3833 struct kvm_xsave *guest_xsave)
3834{
3835 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3836 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
3837 fill_xsave((u8 *) guest_xsave->region, vcpu);
3838 } else {
3839 memcpy(guest_xsave->region,
3840 &vcpu->arch.guest_fpu->state.fxsave,
3841 sizeof(struct fxregs_state));
3842 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
3843 XFEATURE_MASK_FPSSE;
3844 }
3845}
3846
3847#define XSAVE_MXCSR_OFFSET 24
3848
3849static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3850 struct kvm_xsave *guest_xsave)
3851{
3852 u64 xstate_bv =
3853 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3854 u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
3855
3856 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3857
3858
3859
3860
3861
3862 if (xstate_bv & ~kvm_supported_xcr0() ||
3863 mxcsr & ~mxcsr_feature_mask)
3864 return -EINVAL;
3865 load_xsave(vcpu, (u8 *)guest_xsave->region);
3866 } else {
3867 if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
3868 mxcsr & ~mxcsr_feature_mask)
3869 return -EINVAL;
3870 memcpy(&vcpu->arch.guest_fpu->state.fxsave,
3871 guest_xsave->region, sizeof(struct fxregs_state));
3872 }
3873 return 0;
3874}
3875
3876static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
3877 struct kvm_xcrs *guest_xcrs)
3878{
3879 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
3880 guest_xcrs->nr_xcrs = 0;
3881 return;
3882 }
3883
3884 guest_xcrs->nr_xcrs = 1;
3885 guest_xcrs->flags = 0;
3886 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
3887 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
3888}
3889
3890static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3891 struct kvm_xcrs *guest_xcrs)
3892{
3893 int i, r = 0;
3894
3895 if (!boot_cpu_has(X86_FEATURE_XSAVE))
3896 return -EINVAL;
3897
3898 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
3899 return -EINVAL;
3900
3901 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3902
3903 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3904 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3905 guest_xcrs->xcrs[i].value);
3906 break;
3907 }
3908 if (r)
3909 r = -EINVAL;
3910 return r;
3911}
3912
3913
3914
3915
3916
3917
3918
3919static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
3920{
3921 if (!vcpu->arch.pv_time_enabled)
3922 return -EINVAL;
3923 vcpu->arch.pvclock_set_guest_stopped_request = true;
3924 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3925 return 0;
3926}
3927
3928static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3929 struct kvm_enable_cap *cap)
3930{
3931 int r;
3932 uint16_t vmcs_version;
3933 void __user *user_ptr;
3934
3935 if (cap->flags)
3936 return -EINVAL;
3937
3938 switch (cap->cap) {
3939 case KVM_CAP_HYPERV_SYNIC2:
3940 if (cap->args[0])
3941 return -EINVAL;
3942
3943
3944 case KVM_CAP_HYPERV_SYNIC:
3945 if (!irqchip_in_kernel(vcpu->kvm))
3946 return -EINVAL;
3947 return kvm_hv_activate_synic(vcpu, cap->cap ==
3948 KVM_CAP_HYPERV_SYNIC2);
3949 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
3950 if (!kvm_x86_ops->nested_enable_evmcs)
3951 return -ENOTTY;
3952 r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
3953 if (!r) {
3954 user_ptr = (void __user *)(uintptr_t)cap->args[0];
3955 if (copy_to_user(user_ptr, &vmcs_version,
3956 sizeof(vmcs_version)))
3957 r = -EFAULT;
3958 }
3959 return r;
3960
3961 default:
3962 return -EINVAL;
3963 }
3964}
3965
3966long kvm_arch_vcpu_ioctl(struct file *filp,
3967 unsigned int ioctl, unsigned long arg)
3968{
3969 struct kvm_vcpu *vcpu = filp->private_data;
3970 void __user *argp = (void __user *)arg;
3971 int r;
3972 union {
3973 struct kvm_lapic_state *lapic;
3974 struct kvm_xsave *xsave;
3975 struct kvm_xcrs *xcrs;
3976 void *buffer;
3977 } u;
3978
3979 vcpu_load(vcpu);
3980
3981 u.buffer = NULL;
3982 switch (ioctl) {
3983 case KVM_GET_LAPIC: {
3984 r = -EINVAL;
3985 if (!lapic_in_kernel(vcpu))
3986 goto out;
3987 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
3988 GFP_KERNEL_ACCOUNT);
3989
3990 r = -ENOMEM;
3991 if (!u.lapic)
3992 goto out;
3993 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3994 if (r)
3995 goto out;
3996 r = -EFAULT;
3997 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3998 goto out;
3999 r = 0;
4000 break;
4001 }
4002 case KVM_SET_LAPIC: {
4003 r = -EINVAL;
4004 if (!lapic_in_kernel(vcpu))
4005 goto out;
4006 u.lapic = memdup_user(argp, sizeof(*u.lapic));
4007 if (IS_ERR(u.lapic)) {
4008 r = PTR_ERR(u.lapic);
4009 goto out_nofree;
4010 }
4011
4012 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
4013 break;
4014 }
4015 case KVM_INTERRUPT: {
4016 struct kvm_interrupt irq;
4017
4018 r = -EFAULT;
4019 if (copy_from_user(&irq, argp, sizeof(irq)))
4020 goto out;
4021 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
4022 break;
4023 }
4024 case KVM_NMI: {
4025 r = kvm_vcpu_ioctl_nmi(vcpu);
4026 break;
4027 }
4028 case KVM_SMI: {
4029 r = kvm_vcpu_ioctl_smi(vcpu);
4030 break;
4031 }
4032 case KVM_SET_CPUID: {
4033 struct kvm_cpuid __user *cpuid_arg = argp;
4034 struct kvm_cpuid cpuid;
4035
4036 r = -EFAULT;
4037 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4038 goto out;
4039 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
4040 break;
4041 }
4042 case KVM_SET_CPUID2: {
4043 struct kvm_cpuid2 __user *cpuid_arg = argp;
4044 struct kvm_cpuid2 cpuid;
4045
4046 r = -EFAULT;
4047 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4048 goto out;
4049 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
4050 cpuid_arg->entries);
4051 break;
4052 }
4053 case KVM_GET_CPUID2: {
4054 struct kvm_cpuid2 __user *cpuid_arg = argp;
4055 struct kvm_cpuid2 cpuid;
4056
4057 r = -EFAULT;
4058 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4059 goto out;
4060 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
4061 cpuid_arg->entries);
4062 if (r)
4063 goto out;
4064 r = -EFAULT;
4065 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4066 goto out;
4067 r = 0;
4068 break;
4069 }
4070 case KVM_GET_MSRS: {
4071 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4072 r = msr_io(vcpu, argp, do_get_msr, 1);
4073 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4074 break;
4075 }
4076 case KVM_SET_MSRS: {
4077 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4078 r = msr_io(vcpu, argp, do_set_msr, 0);
4079 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4080 break;
4081 }
4082 case KVM_TPR_ACCESS_REPORTING: {
4083 struct kvm_tpr_access_ctl tac;
4084
4085 r = -EFAULT;
4086 if (copy_from_user(&tac, argp, sizeof(tac)))
4087 goto out;
4088 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
4089 if (r)
4090 goto out;
4091 r = -EFAULT;
4092 if (copy_to_user(argp, &tac, sizeof(tac)))
4093 goto out;
4094 r = 0;
4095 break;
4096 };
4097 case KVM_SET_VAPIC_ADDR: {
4098 struct kvm_vapic_addr va;
4099 int idx;
4100
4101 r = -EINVAL;
4102 if (!lapic_in_kernel(vcpu))
4103 goto out;
4104 r = -EFAULT;
4105 if (copy_from_user(&va, argp, sizeof(va)))
4106 goto out;
4107 idx = srcu_read_lock(&vcpu->kvm->srcu);
4108 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
4109 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4110 break;
4111 }
4112 case KVM_X86_SETUP_MCE: {
4113 u64 mcg_cap;
4114
4115 r = -EFAULT;
4116 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
4117 goto out;
4118 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
4119 break;
4120 }
4121 case KVM_X86_SET_MCE: {
4122 struct kvm_x86_mce mce;
4123
4124 r = -EFAULT;
4125 if (copy_from_user(&mce, argp, sizeof(mce)))
4126 goto out;
4127 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
4128 break;
4129 }
4130 case KVM_GET_VCPU_EVENTS: {
4131 struct kvm_vcpu_events events;
4132
4133 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
4134
4135 r = -EFAULT;
4136 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
4137 break;
4138 r = 0;
4139 break;
4140 }
4141 case KVM_SET_VCPU_EVENTS: {
4142 struct kvm_vcpu_events events;
4143
4144 r = -EFAULT;
4145 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
4146 break;
4147
4148 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
4149 break;
4150 }
4151 case KVM_GET_DEBUGREGS: {
4152 struct kvm_debugregs dbgregs;
4153
4154 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
4155
4156 r = -EFAULT;
4157 if (copy_to_user(argp, &dbgregs,
4158 sizeof(struct kvm_debugregs)))
4159 break;
4160 r = 0;
4161 break;
4162 }
4163 case KVM_SET_DEBUGREGS: {
4164 struct kvm_debugregs dbgregs;
4165
4166 r = -EFAULT;
4167 if (copy_from_user(&dbgregs, argp,
4168 sizeof(struct kvm_debugregs)))
4169 break;
4170
4171 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
4172 break;
4173 }
4174 case KVM_GET_XSAVE: {
4175 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
4176 r = -ENOMEM;
4177 if (!u.xsave)
4178 break;
4179
4180 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
4181
4182 r = -EFAULT;
4183 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
4184 break;
4185 r = 0;
4186 break;
4187 }
4188 case KVM_SET_XSAVE: {
4189 u.xsave = memdup_user(argp, sizeof(*u.xsave));
4190 if (IS_ERR(u.xsave)) {
4191 r = PTR_ERR(u.xsave);
4192 goto out_nofree;
4193 }
4194
4195 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
4196 break;
4197 }
4198 case KVM_GET_XCRS: {
4199 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
4200 r = -ENOMEM;
4201 if (!u.xcrs)
4202 break;
4203
4204 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
4205
4206 r = -EFAULT;
4207 if (copy_to_user(argp, u.xcrs,
4208 sizeof(struct kvm_xcrs)))
4209 break;
4210 r = 0;
4211 break;
4212 }
4213 case KVM_SET_XCRS: {
4214 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
4215 if (IS_ERR(u.xcrs)) {
4216 r = PTR_ERR(u.xcrs);
4217 goto out_nofree;
4218 }
4219
4220 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
4221 break;
4222 }
4223 case KVM_SET_TSC_KHZ: {
4224 u32 user_tsc_khz;
4225
4226 r = -EINVAL;
4227 user_tsc_khz = (u32)arg;
4228
4229 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
4230 goto out;
4231
4232 if (user_tsc_khz == 0)
4233 user_tsc_khz = tsc_khz;
4234
4235 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
4236 r = 0;
4237
4238 goto out;
4239 }
4240 case KVM_GET_TSC_KHZ: {
4241 r = vcpu->arch.virtual_tsc_khz;
4242 goto out;
4243 }
4244 case KVM_KVMCLOCK_CTRL: {
4245 r = kvm_set_guest_paused(vcpu);
4246 goto out;
4247 }
4248 case KVM_ENABLE_CAP: {
4249 struct kvm_enable_cap cap;
4250
4251 r = -EFAULT;
4252 if (copy_from_user(&cap, argp, sizeof(cap)))
4253 goto out;
4254 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4255 break;
4256 }
4257 case KVM_GET_NESTED_STATE: {
4258 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4259 u32 user_data_size;
4260
4261 r = -EINVAL;
4262 if (!kvm_x86_ops->get_nested_state)
4263 break;
4264
4265 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
4266 r = -EFAULT;
4267 if (get_user(user_data_size, &user_kvm_nested_state->size))
4268 break;
4269
4270 r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
4271 user_data_size);
4272 if (r < 0)
4273 break;
4274
4275 if (r > user_data_size) {
4276 if (put_user(r, &user_kvm_nested_state->size))
4277 r = -EFAULT;
4278 else
4279 r = -E2BIG;
4280 break;
4281 }
4282
4283 r = 0;
4284 break;
4285 }
4286 case KVM_SET_NESTED_STATE: {
4287 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4288 struct kvm_nested_state kvm_state;
4289
4290 r = -EINVAL;
4291 if (!kvm_x86_ops->set_nested_state)
4292 break;
4293
4294 r = -EFAULT;
4295 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
4296 break;
4297
4298 r = -EINVAL;
4299 if (kvm_state.size < sizeof(kvm_state))
4300 break;
4301
4302 if (kvm_state.flags &
4303 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
4304 | KVM_STATE_NESTED_EVMCS))
4305 break;
4306
4307
4308 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
4309 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
4310 break;
4311
4312 r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
4313 break;
4314 }
4315 case KVM_GET_SUPPORTED_HV_CPUID: {
4316 struct kvm_cpuid2 __user *cpuid_arg = argp;
4317 struct kvm_cpuid2 cpuid;
4318
4319 r = -EFAULT;
4320 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4321 goto out;
4322
4323 r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
4324 cpuid_arg->entries);
4325 if (r)
4326 goto out;
4327
4328 r = -EFAULT;
4329 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4330 goto out;
4331 r = 0;
4332 break;
4333 }
4334 default:
4335 r = -EINVAL;
4336 }
4337out:
4338 kfree(u.buffer);
4339out_nofree:
4340 vcpu_put(vcpu);
4341 return r;
4342}
4343
4344vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4345{
4346 return VM_FAULT_SIGBUS;
4347}
4348
4349static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
4350{
4351 int ret;
4352
4353 if (addr > (unsigned int)(-3 * PAGE_SIZE))
4354 return -EINVAL;
4355 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
4356 return ret;
4357}
4358
4359static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
4360 u64 ident_addr)
4361{
4362 return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
4363}
4364
4365static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
4366 unsigned long kvm_nr_mmu_pages)
4367{
4368 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
4369 return -EINVAL;
4370
4371 mutex_lock(&kvm->slots_lock);
4372
4373 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
4374 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
4375
4376 mutex_unlock(&kvm->slots_lock);
4377 return 0;
4378}
4379
4380static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
4381{
4382 return kvm->arch.n_max_mmu_pages;
4383}
4384
4385static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4386{
4387 struct kvm_pic *pic = kvm->arch.vpic;
4388 int r;
4389
4390 r = 0;
4391 switch (chip->chip_id) {
4392 case KVM_IRQCHIP_PIC_MASTER:
4393 memcpy(&chip->chip.pic, &pic->pics[0],
4394 sizeof(struct kvm_pic_state));
4395 break;
4396 case KVM_IRQCHIP_PIC_SLAVE:
4397 memcpy(&chip->chip.pic, &pic->pics[1],
4398 sizeof(struct kvm_pic_state));
4399 break;
4400 case KVM_IRQCHIP_IOAPIC:
4401 kvm_get_ioapic(kvm, &chip->chip.ioapic);
4402 break;
4403 default:
4404 r = -EINVAL;
4405 break;
4406 }
4407 return r;
4408}
4409
4410static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4411{
4412 struct kvm_pic *pic = kvm->arch.vpic;
4413 int r;
4414
4415 r = 0;
4416 switch (chip->chip_id) {
4417 case KVM_IRQCHIP_PIC_MASTER:
4418 spin_lock(&pic->lock);
4419 memcpy(&pic->pics[0], &chip->chip.pic,
4420 sizeof(struct kvm_pic_state));
4421 spin_unlock(&pic->lock);
4422 break;
4423 case KVM_IRQCHIP_PIC_SLAVE:
4424 spin_lock(&pic->lock);
4425 memcpy(&pic->pics[1], &chip->chip.pic,
4426 sizeof(struct kvm_pic_state));
4427 spin_unlock(&pic->lock);
4428 break;
4429 case KVM_IRQCHIP_IOAPIC:
4430 kvm_set_ioapic(kvm, &chip->chip.ioapic);
4431 break;
4432 default:
4433 r = -EINVAL;
4434 break;
4435 }
4436 kvm_pic_update_irq(pic);
4437 return r;
4438}
4439
4440static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4441{
4442 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
4443
4444 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
4445
4446 mutex_lock(&kps->lock);
4447 memcpy(ps, &kps->channels, sizeof(*ps));
4448 mutex_unlock(&kps->lock);
4449 return 0;
4450}
4451
4452static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4453{
4454 int i;
4455 struct kvm_pit *pit = kvm->arch.vpit;
4456
4457 mutex_lock(&pit->pit_state.lock);
4458 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
4459 for (i = 0; i < 3; i++)
4460 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
4461 mutex_unlock(&pit->pit_state.lock);
4462 return 0;
4463}
4464
4465static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4466{
4467 mutex_lock(&kvm->arch.vpit->pit_state.lock);
4468 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
4469 sizeof(ps->channels));
4470 ps->flags = kvm->arch.vpit->pit_state.flags;
4471 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
4472 memset(&ps->reserved, 0, sizeof(ps->reserved));
4473 return 0;
4474}
4475
4476static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4477{
4478 int start = 0;
4479 int i;
4480 u32 prev_legacy, cur_legacy;
4481 struct kvm_pit *pit = kvm->arch.vpit;
4482
4483 mutex_lock(&pit->pit_state.lock);
4484 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
4485 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
4486 if (!prev_legacy && cur_legacy)
4487 start = 1;
4488 memcpy(&pit->pit_state.channels, &ps->channels,
4489 sizeof(pit->pit_state.channels));
4490 pit->pit_state.flags = ps->flags;
4491 for (i = 0; i < 3; i++)
4492 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
4493 start && i == 0);
4494 mutex_unlock(&pit->pit_state.lock);
4495 return 0;
4496}
4497
4498static int kvm_vm_ioctl_reinject(struct kvm *kvm,
4499 struct kvm_reinject_control *control)
4500{
4501 struct kvm_pit *pit = kvm->arch.vpit;
4502
4503 if (!pit)
4504 return -ENXIO;
4505
4506
4507
4508
4509
4510 mutex_lock(&pit->pit_state.lock);
4511 kvm_pit_set_reinject(pit, control->pit_reinject);
4512 mutex_unlock(&pit->pit_state.lock);
4513
4514 return 0;
4515}
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
4537{
4538 bool flush = false;
4539 int r;
4540
4541 mutex_lock(&kvm->slots_lock);
4542
4543
4544
4545
4546 if (kvm_x86_ops->flush_log_dirty)
4547 kvm_x86_ops->flush_log_dirty(kvm);
4548
4549 r = kvm_get_dirty_log_protect(kvm, log, &flush);
4550
4551
4552
4553
4554
4555 lockdep_assert_held(&kvm->slots_lock);
4556 if (flush)
4557 kvm_flush_remote_tlbs(kvm);
4558
4559 mutex_unlock(&kvm->slots_lock);
4560 return r;
4561}
4562
4563int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
4564{
4565 bool flush = false;
4566 int r;
4567
4568 mutex_lock(&kvm->slots_lock);
4569
4570
4571
4572
4573 if (kvm_x86_ops->flush_log_dirty)
4574 kvm_x86_ops->flush_log_dirty(kvm);
4575
4576 r = kvm_clear_dirty_log_protect(kvm, log, &flush);
4577
4578
4579
4580
4581
4582 lockdep_assert_held(&kvm->slots_lock);
4583 if (flush)
4584 kvm_flush_remote_tlbs(kvm);
4585
4586 mutex_unlock(&kvm->slots_lock);
4587 return r;
4588}
4589
4590int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
4591 bool line_status)
4592{
4593 if (!irqchip_in_kernel(kvm))
4594 return -ENXIO;
4595
4596 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
4597 irq_event->irq, irq_event->level,
4598 line_status);
4599 return 0;
4600}
4601
4602int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
4603 struct kvm_enable_cap *cap)
4604{
4605 int r;
4606
4607 if (cap->flags)
4608 return -EINVAL;
4609
4610 switch (cap->cap) {
4611 case KVM_CAP_DISABLE_QUIRKS:
4612 kvm->arch.disabled_quirks = cap->args[0];
4613 r = 0;
4614 break;
4615 case KVM_CAP_SPLIT_IRQCHIP: {
4616 mutex_lock(&kvm->lock);
4617 r = -EINVAL;
4618 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
4619 goto split_irqchip_unlock;
4620 r = -EEXIST;
4621 if (irqchip_in_kernel(kvm))
4622 goto split_irqchip_unlock;
4623 if (kvm->created_vcpus)
4624 goto split_irqchip_unlock;
4625 r = kvm_setup_empty_irq_routing(kvm);
4626 if (r)
4627 goto split_irqchip_unlock;
4628
4629 smp_wmb();
4630 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
4631 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
4632 r = 0;
4633split_irqchip_unlock:
4634 mutex_unlock(&kvm->lock);
4635 break;
4636 }
4637 case KVM_CAP_X2APIC_API:
4638 r = -EINVAL;
4639 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
4640 break;
4641
4642 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
4643 kvm->arch.x2apic_format = true;
4644 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
4645 kvm->arch.x2apic_broadcast_quirk_disabled = true;
4646
4647 r = 0;
4648 break;
4649 case KVM_CAP_X86_DISABLE_EXITS:
4650 r = -EINVAL;
4651 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
4652 break;
4653
4654 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
4655 kvm_can_mwait_in_guest())
4656 kvm->arch.mwait_in_guest = true;
4657 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
4658 kvm->arch.hlt_in_guest = true;
4659 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
4660 kvm->arch.pause_in_guest = true;
4661 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
4662 kvm->arch.cstate_in_guest = true;
4663 r = 0;
4664 break;
4665 case KVM_CAP_MSR_PLATFORM_INFO:
4666 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
4667 r = 0;
4668 break;
4669 case KVM_CAP_EXCEPTION_PAYLOAD:
4670 kvm->arch.exception_payload_enabled = cap->args[0];
4671 r = 0;
4672 break;
4673 default:
4674 r = -EINVAL;
4675 break;
4676 }
4677 return r;
4678}
4679
4680long kvm_arch_vm_ioctl(struct file *filp,
4681 unsigned int ioctl, unsigned long arg)
4682{
4683 struct kvm *kvm = filp->private_data;
4684 void __user *argp = (void __user *)arg;
4685 int r = -ENOTTY;
4686
4687
4688
4689
4690
4691 union {
4692 struct kvm_pit_state ps;
4693 struct kvm_pit_state2 ps2;
4694 struct kvm_pit_config pit_config;
4695 } u;
4696
4697 switch (ioctl) {
4698 case KVM_SET_TSS_ADDR:
4699 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
4700 break;
4701 case KVM_SET_IDENTITY_MAP_ADDR: {
4702 u64 ident_addr;
4703
4704 mutex_lock(&kvm->lock);
4705 r = -EINVAL;
4706 if (kvm->created_vcpus)
4707 goto set_identity_unlock;
4708 r = -EFAULT;
4709 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
4710 goto set_identity_unlock;
4711 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
4712set_identity_unlock:
4713 mutex_unlock(&kvm->lock);
4714 break;
4715 }
4716 case KVM_SET_NR_MMU_PAGES:
4717 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
4718 break;
4719 case KVM_GET_NR_MMU_PAGES:
4720 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
4721 break;
4722 case KVM_CREATE_IRQCHIP: {
4723 mutex_lock(&kvm->lock);
4724
4725 r = -EEXIST;
4726 if (irqchip_in_kernel(kvm))
4727 goto create_irqchip_unlock;
4728
4729 r = -EINVAL;
4730 if (kvm->created_vcpus)
4731 goto create_irqchip_unlock;
4732
4733 r = kvm_pic_init(kvm);
4734 if (r)
4735 goto create_irqchip_unlock;
4736
4737 r = kvm_ioapic_init(kvm);
4738 if (r) {
4739 kvm_pic_destroy(kvm);
4740 goto create_irqchip_unlock;
4741 }
4742
4743 r = kvm_setup_default_irq_routing(kvm);
4744 if (r) {
4745 kvm_ioapic_destroy(kvm);
4746 kvm_pic_destroy(kvm);
4747 goto create_irqchip_unlock;
4748 }
4749
4750 smp_wmb();
4751 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
4752 create_irqchip_unlock:
4753 mutex_unlock(&kvm->lock);
4754 break;
4755 }
4756 case KVM_CREATE_PIT:
4757 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
4758 goto create_pit;
4759 case KVM_CREATE_PIT2:
4760 r = -EFAULT;
4761 if (copy_from_user(&u.pit_config, argp,
4762 sizeof(struct kvm_pit_config)))
4763 goto out;
4764 create_pit:
4765 mutex_lock(&kvm->lock);
4766 r = -EEXIST;
4767 if (kvm->arch.vpit)
4768 goto create_pit_unlock;
4769 r = -ENOMEM;
4770 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
4771 if (kvm->arch.vpit)
4772 r = 0;
4773 create_pit_unlock:
4774 mutex_unlock(&kvm->lock);
4775 break;
4776 case KVM_GET_IRQCHIP: {
4777
4778 struct kvm_irqchip *chip;
4779
4780 chip = memdup_user(argp, sizeof(*chip));
4781 if (IS_ERR(chip)) {
4782 r = PTR_ERR(chip);
4783 goto out;
4784 }
4785
4786 r = -ENXIO;
4787 if (!irqchip_kernel(kvm))
4788 goto get_irqchip_out;
4789 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
4790 if (r)
4791 goto get_irqchip_out;
4792 r = -EFAULT;
4793 if (copy_to_user(argp, chip, sizeof(*chip)))
4794 goto get_irqchip_out;
4795 r = 0;
4796 get_irqchip_out:
4797 kfree(chip);
4798 break;
4799 }
4800 case KVM_SET_IRQCHIP: {
4801
4802 struct kvm_irqchip *chip;
4803
4804 chip = memdup_user(argp, sizeof(*chip));
4805 if (IS_ERR(chip)) {
4806 r = PTR_ERR(chip);
4807 goto out;
4808 }
4809
4810 r = -ENXIO;
4811 if (!irqchip_kernel(kvm))
4812 goto set_irqchip_out;
4813 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
4814 if (r)
4815 goto set_irqchip_out;
4816 r = 0;
4817 set_irqchip_out:
4818 kfree(chip);
4819 break;
4820 }
4821 case KVM_GET_PIT: {
4822 r = -EFAULT;
4823 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
4824 goto out;
4825 r = -ENXIO;
4826 if (!kvm->arch.vpit)
4827 goto out;
4828 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
4829 if (r)
4830 goto out;
4831 r = -EFAULT;
4832 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
4833 goto out;
4834 r = 0;
4835 break;
4836 }
4837 case KVM_SET_PIT: {
4838 r = -EFAULT;
4839 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
4840 goto out;
4841 r = -ENXIO;
4842 if (!kvm->arch.vpit)
4843 goto out;
4844 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
4845 break;
4846 }
4847 case KVM_GET_PIT2: {
4848 r = -ENXIO;
4849 if (!kvm->arch.vpit)
4850 goto out;
4851 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
4852 if (r)
4853 goto out;
4854 r = -EFAULT;
4855 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
4856 goto out;
4857 r = 0;
4858 break;
4859 }
4860 case KVM_SET_PIT2: {
4861 r = -EFAULT;
4862 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
4863 goto out;
4864 r = -ENXIO;
4865 if (!kvm->arch.vpit)
4866 goto out;
4867 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
4868 break;
4869 }
4870 case KVM_REINJECT_CONTROL: {
4871 struct kvm_reinject_control control;
4872 r = -EFAULT;
4873 if (copy_from_user(&control, argp, sizeof(control)))
4874 goto out;
4875 r = kvm_vm_ioctl_reinject(kvm, &control);
4876 break;
4877 }
4878 case KVM_SET_BOOT_CPU_ID:
4879 r = 0;
4880 mutex_lock(&kvm->lock);
4881 if (kvm->created_vcpus)
4882 r = -EBUSY;
4883 else
4884 kvm->arch.bsp_vcpu_id = arg;
4885 mutex_unlock(&kvm->lock);
4886 break;
4887 case KVM_XEN_HVM_CONFIG: {
4888 struct kvm_xen_hvm_config xhc;
4889 r = -EFAULT;
4890 if (copy_from_user(&xhc, argp, sizeof(xhc)))
4891 goto out;
4892 r = -EINVAL;
4893 if (xhc.flags)
4894 goto out;
4895 memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
4896 r = 0;
4897 break;
4898 }
4899 case KVM_SET_CLOCK: {
4900 struct kvm_clock_data user_ns;
4901 u64 now_ns;
4902
4903 r = -EFAULT;
4904 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
4905 goto out;
4906
4907 r = -EINVAL;
4908 if (user_ns.flags)
4909 goto out;
4910
4911 r = 0;
4912
4913
4914
4915
4916
4917 kvm_gen_update_masterclock(kvm);
4918 now_ns = get_kvmclock_ns(kvm);
4919 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
4920 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
4921 break;
4922 }
4923 case KVM_GET_CLOCK: {
4924 struct kvm_clock_data user_ns;
4925 u64 now_ns;
4926
4927 now_ns = get_kvmclock_ns(kvm);
4928 user_ns.clock = now_ns;
4929 user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
4930 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
4931
4932 r = -EFAULT;
4933 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
4934 goto out;
4935 r = 0;
4936 break;
4937 }
4938 case KVM_MEMORY_ENCRYPT_OP: {
4939 r = -ENOTTY;
4940 if (kvm_x86_ops->mem_enc_op)
4941 r = kvm_x86_ops->mem_enc_op(kvm, argp);
4942 break;
4943 }
4944 case KVM_MEMORY_ENCRYPT_REG_REGION: {
4945 struct kvm_enc_region region;
4946
4947 r = -EFAULT;
4948 if (copy_from_user(®ion, argp, sizeof(region)))
4949 goto out;
4950
4951 r = -ENOTTY;
4952 if (kvm_x86_ops->mem_enc_reg_region)
4953 r = kvm_x86_ops->mem_enc_reg_region(kvm, ®ion);
4954 break;
4955 }
4956 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
4957 struct kvm_enc_region region;
4958
4959 r = -EFAULT;
4960 if (copy_from_user(®ion, argp, sizeof(region)))
4961 goto out;
4962
4963 r = -ENOTTY;
4964 if (kvm_x86_ops->mem_enc_unreg_region)
4965 r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion);
4966 break;
4967 }
4968 case KVM_HYPERV_EVENTFD: {
4969 struct kvm_hyperv_eventfd hvevfd;
4970
4971 r = -EFAULT;
4972 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
4973 goto out;
4974 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
4975 break;
4976 }
4977 case KVM_SET_PMU_EVENT_FILTER:
4978 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
4979 break;
4980 default:
4981 r = -ENOTTY;
4982 }
4983out:
4984 return r;
4985}
4986
4987static void kvm_init_msr_list(void)
4988{
4989 u32 dummy[2];
4990 unsigned i, j;
4991
4992 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
4993 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
4994 continue;
4995
4996
4997
4998
4999
5000 switch (msrs_to_save[i]) {
5001 case MSR_IA32_BNDCFGS:
5002 if (!kvm_mpx_supported())
5003 continue;
5004 break;
5005 case MSR_TSC_AUX:
5006 if (!kvm_x86_ops->rdtscp_supported())
5007 continue;
5008 break;
5009 case MSR_IA32_RTIT_CTL:
5010 case MSR_IA32_RTIT_STATUS:
5011 if (!kvm_x86_ops->pt_supported())
5012 continue;
5013 break;
5014 case MSR_IA32_RTIT_CR3_MATCH:
5015 if (!kvm_x86_ops->pt_supported() ||
5016 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
5017 continue;
5018 break;
5019 case MSR_IA32_RTIT_OUTPUT_BASE:
5020 case MSR_IA32_RTIT_OUTPUT_MASK:
5021 if (!kvm_x86_ops->pt_supported() ||
5022 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
5023 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
5024 continue;
5025 break;
5026 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
5027 if (!kvm_x86_ops->pt_supported() ||
5028 msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
5029 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
5030 continue;
5031 break;
5032 }
5033 default:
5034 break;
5035 }
5036
5037 if (j < i)
5038 msrs_to_save[j] = msrs_to_save[i];
5039 j++;
5040 }
5041 num_msrs_to_save = j;
5042
5043 for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
5044 if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
5045 continue;
5046
5047 if (j < i)
5048 emulated_msrs[j] = emulated_msrs[i];
5049 j++;
5050 }
5051 num_emulated_msrs = j;
5052
5053 for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
5054 struct kvm_msr_entry msr;
5055
5056 msr.index = msr_based_features[i];
5057 if (kvm_get_msr_feature(&msr))
5058 continue;
5059
5060 if (j < i)
5061 msr_based_features[j] = msr_based_features[i];
5062 j++;
5063 }
5064 num_msr_based_features = j;
5065}
5066
5067static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
5068 const void *v)
5069{
5070 int handled = 0;
5071 int n;
5072
5073 do {
5074 n = min(len, 8);
5075 if (!(lapic_in_kernel(vcpu) &&
5076 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
5077 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
5078 break;
5079 handled += n;
5080 addr += n;
5081 len -= n;
5082 v += n;
5083 } while (len);
5084
5085 return handled;
5086}
5087
5088static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
5089{
5090 int handled = 0;
5091 int n;
5092
5093 do {
5094 n = min(len, 8);
5095 if (!(lapic_in_kernel(vcpu) &&
5096 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
5097 addr, n, v))
5098 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
5099 break;
5100 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
5101 handled += n;
5102 addr += n;
5103 len -= n;
5104 v += n;
5105 } while (len);
5106
5107 return handled;
5108}
5109
5110static void kvm_set_segment(struct kvm_vcpu *vcpu,
5111 struct kvm_segment *var, int seg)
5112{
5113 kvm_x86_ops->set_segment(vcpu, var, seg);
5114}
5115
5116void kvm_get_segment(struct kvm_vcpu *vcpu,
5117 struct kvm_segment *var, int seg)
5118{
5119 kvm_x86_ops->get_segment(vcpu, var, seg);
5120}
5121
5122gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
5123 struct x86_exception *exception)
5124{
5125 gpa_t t_gpa;
5126
5127 BUG_ON(!mmu_is_nested(vcpu));
5128
5129
5130 access |= PFERR_USER_MASK;
5131 t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
5132
5133 return t_gpa;
5134}
5135
5136gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
5137 struct x86_exception *exception)
5138{
5139 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5140 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5141}
5142
5143 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
5144 struct x86_exception *exception)
5145{
5146 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5147 access |= PFERR_FETCH_MASK;
5148 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5149}
5150
5151gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
5152 struct x86_exception *exception)
5153{
5154 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5155 access |= PFERR_WRITE_MASK;
5156 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5157}
5158
5159
5160gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
5161 struct x86_exception *exception)
5162{
5163 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
5164}
5165
5166static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5167 struct kvm_vcpu *vcpu, u32 access,
5168 struct x86_exception *exception)
5169{
5170 void *data = val;
5171 int r = X86EMUL_CONTINUE;
5172
5173 while (bytes) {
5174 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
5175 exception);
5176 unsigned offset = addr & (PAGE_SIZE-1);
5177 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
5178 int ret;
5179
5180 if (gpa == UNMAPPED_GVA)
5181 return X86EMUL_PROPAGATE_FAULT;
5182 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
5183 offset, toread);
5184 if (ret < 0) {
5185 r = X86EMUL_IO_NEEDED;
5186 goto out;
5187 }
5188
5189 bytes -= toread;
5190 data += toread;
5191 addr += toread;
5192 }
5193out:
5194 return r;
5195}
5196
5197
5198static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
5199 gva_t addr, void *val, unsigned int bytes,
5200 struct x86_exception *exception)
5201{
5202 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5203 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5204 unsigned offset;
5205 int ret;
5206
5207
5208 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
5209 exception);
5210 if (unlikely(gpa == UNMAPPED_GVA))
5211 return X86EMUL_PROPAGATE_FAULT;
5212
5213 offset = addr & (PAGE_SIZE-1);
5214 if (WARN_ON(offset + bytes > PAGE_SIZE))
5215 bytes = (unsigned)PAGE_SIZE - offset;
5216 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
5217 offset, bytes);
5218 if (unlikely(ret < 0))
5219 return X86EMUL_IO_NEEDED;
5220
5221 return X86EMUL_CONTINUE;
5222}
5223
5224int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
5225 gva_t addr, void *val, unsigned int bytes,
5226 struct x86_exception *exception)
5227{
5228 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5229
5230
5231
5232
5233
5234
5235
5236 memset(exception, 0, sizeof(*exception));
5237 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
5238 exception);
5239}
5240EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
5241
5242static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
5243 gva_t addr, void *val, unsigned int bytes,
5244 struct x86_exception *exception, bool system)
5245{
5246 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5247 u32 access = 0;
5248
5249 if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
5250 access |= PFERR_USER_MASK;
5251
5252 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
5253}
5254
5255static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
5256 unsigned long addr, void *val, unsigned int bytes)
5257{
5258 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5259 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
5260
5261 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
5262}
5263
5264static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5265 struct kvm_vcpu *vcpu, u32 access,
5266 struct x86_exception *exception)
5267{
5268 void *data = val;
5269 int r = X86EMUL_CONTINUE;
5270
5271 while (bytes) {
5272 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
5273 access,
5274 exception);
5275 unsigned offset = addr & (PAGE_SIZE-1);
5276 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
5277 int ret;
5278
5279 if (gpa == UNMAPPED_GVA)
5280 return X86EMUL_PROPAGATE_FAULT;
5281 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
5282 if (ret < 0) {
5283 r = X86EMUL_IO_NEEDED;
5284 goto out;
5285 }
5286
5287 bytes -= towrite;
5288 data += towrite;
5289 addr += towrite;
5290 }
5291out:
5292 return r;
5293}
5294
5295static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
5296 unsigned int bytes, struct x86_exception *exception,
5297 bool system)
5298{
5299 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5300 u32 access = PFERR_WRITE_MASK;
5301
5302 if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
5303 access |= PFERR_USER_MASK;
5304
5305 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
5306 access, exception);
5307}
5308
5309int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
5310 unsigned int bytes, struct x86_exception *exception)
5311{
5312
5313 vcpu->arch.l1tf_flush_l1d = true;
5314
5315
5316
5317
5318
5319
5320
5321 memset(exception, 0, sizeof(*exception));
5322 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
5323 PFERR_WRITE_MASK, exception);
5324}
5325EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
5326
5327int handle_ud(struct kvm_vcpu *vcpu)
5328{
5329 int emul_type = EMULTYPE_TRAP_UD;
5330 enum emulation_result er;
5331 char sig[5];
5332 struct x86_exception e;
5333
5334 if (force_emulation_prefix &&
5335 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
5336 sig, sizeof(sig), &e) == 0 &&
5337 memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
5338 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
5339 emul_type = 0;
5340 }
5341
5342 er = kvm_emulate_instruction(vcpu, emul_type);
5343 if (er == EMULATE_USER_EXIT)
5344 return 0;
5345 if (er != EMULATE_DONE)
5346 kvm_queue_exception(vcpu, UD_VECTOR);
5347 return 1;
5348}
5349EXPORT_SYMBOL_GPL(handle_ud);
5350
5351static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5352 gpa_t gpa, bool write)
5353{
5354
5355 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5356 return 1;
5357
5358 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
5359 trace_vcpu_match_mmio(gva, gpa, write, true);
5360 return 1;
5361 }
5362
5363 return 0;
5364}
5365
5366static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5367 gpa_t *gpa, struct x86_exception *exception,
5368 bool write)
5369{
5370 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
5371 | (write ? PFERR_WRITE_MASK : 0);
5372
5373
5374
5375
5376
5377
5378 if (vcpu_match_mmio_gva(vcpu, gva)
5379 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
5380 vcpu->arch.access, 0, access)) {
5381 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
5382 (gva & (PAGE_SIZE - 1));
5383 trace_vcpu_match_mmio(gva, *gpa, write, false);
5384 return 1;
5385 }
5386
5387 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5388
5389 if (*gpa == UNMAPPED_GVA)
5390 return -1;
5391
5392 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
5393}
5394
5395int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
5396 const void *val, int bytes)
5397{
5398 int ret;
5399
5400 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
5401 if (ret < 0)
5402 return 0;
5403 kvm_page_track_write(vcpu, gpa, val, bytes);
5404 return 1;
5405}
5406
5407struct read_write_emulator_ops {
5408 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
5409 int bytes);
5410 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
5411 void *val, int bytes);
5412 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5413 int bytes, void *val);
5414 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5415 void *val, int bytes);
5416 bool write;
5417};
5418
5419static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
5420{
5421 if (vcpu->mmio_read_completed) {
5422 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
5423 vcpu->mmio_fragments[0].gpa, val);
5424 vcpu->mmio_read_completed = 0;
5425 return 1;
5426 }
5427
5428 return 0;
5429}
5430
5431static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5432 void *val, int bytes)
5433{
5434 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
5435}
5436
5437static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5438 void *val, int bytes)
5439{
5440 return emulator_write_phys(vcpu, gpa, val, bytes);
5441}
5442
5443static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
5444{
5445 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
5446 return vcpu_mmio_write(vcpu, gpa, bytes, val);
5447}
5448
5449static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5450 void *val, int bytes)
5451{
5452 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
5453 return X86EMUL_IO_NEEDED;
5454}
5455
5456static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5457 void *val, int bytes)
5458{
5459 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
5460
5461 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
5462 return X86EMUL_CONTINUE;
5463}
5464
5465static const struct read_write_emulator_ops read_emultor = {
5466 .read_write_prepare = read_prepare,
5467 .read_write_emulate = read_emulate,
5468 .read_write_mmio = vcpu_mmio_read,
5469 .read_write_exit_mmio = read_exit_mmio,
5470};
5471
5472static const struct read_write_emulator_ops write_emultor = {
5473 .read_write_emulate = write_emulate,
5474 .read_write_mmio = write_mmio,
5475 .read_write_exit_mmio = write_exit_mmio,
5476 .write = true,
5477};
5478
5479static int emulator_read_write_onepage(unsigned long addr, void *val,
5480 unsigned int bytes,
5481 struct x86_exception *exception,
5482 struct kvm_vcpu *vcpu,
5483 const struct read_write_emulator_ops *ops)
5484{
5485 gpa_t gpa;
5486 int handled, ret;
5487 bool write = ops->write;
5488 struct kvm_mmio_fragment *frag;
5489 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5490
5491
5492
5493
5494
5495
5496
5497
5498 if (vcpu->arch.gpa_available &&
5499 emulator_can_use_gpa(ctxt) &&
5500 (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
5501 gpa = vcpu->arch.gpa_val;
5502 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
5503 } else {
5504 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
5505 if (ret < 0)
5506 return X86EMUL_PROPAGATE_FAULT;
5507 }
5508
5509 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
5510 return X86EMUL_CONTINUE;
5511
5512
5513
5514
5515 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
5516 if (handled == bytes)
5517 return X86EMUL_CONTINUE;
5518
5519 gpa += handled;
5520 bytes -= handled;
5521 val += handled;
5522
5523 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
5524 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
5525 frag->gpa = gpa;
5526 frag->data = val;
5527 frag->len = bytes;
5528 return X86EMUL_CONTINUE;
5529}
5530
5531static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
5532 unsigned long addr,
5533 void *val, unsigned int bytes,
5534 struct x86_exception *exception,
5535 const struct read_write_emulator_ops *ops)
5536{
5537 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5538 gpa_t gpa;
5539 int rc;
5540
5541 if (ops->read_write_prepare &&
5542 ops->read_write_prepare(vcpu, val, bytes))
5543 return X86EMUL_CONTINUE;
5544
5545 vcpu->mmio_nr_fragments = 0;
5546
5547
5548 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
5549 int now;
5550
5551 now = -addr & ~PAGE_MASK;
5552 rc = emulator_read_write_onepage(addr, val, now, exception,
5553 vcpu, ops);
5554
5555 if (rc != X86EMUL_CONTINUE)
5556 return rc;
5557 addr += now;
5558 if (ctxt->mode != X86EMUL_MODE_PROT64)
5559 addr = (u32)addr;
5560 val += now;
5561 bytes -= now;
5562 }
5563
5564 rc = emulator_read_write_onepage(addr, val, bytes, exception,
5565 vcpu, ops);
5566 if (rc != X86EMUL_CONTINUE)
5567 return rc;
5568
5569 if (!vcpu->mmio_nr_fragments)
5570 return rc;
5571
5572 gpa = vcpu->mmio_fragments[0].gpa;
5573
5574 vcpu->mmio_needed = 1;
5575 vcpu->mmio_cur_fragment = 0;
5576
5577 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
5578 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
5579 vcpu->run->exit_reason = KVM_EXIT_MMIO;
5580 vcpu->run->mmio.phys_addr = gpa;
5581
5582 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
5583}
5584
5585static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
5586 unsigned long addr,
5587 void *val,
5588 unsigned int bytes,
5589 struct x86_exception *exception)
5590{
5591 return emulator_read_write(ctxt, addr, val, bytes,
5592 exception, &read_emultor);
5593}
5594
5595static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
5596 unsigned long addr,
5597 const void *val,
5598 unsigned int bytes,
5599 struct x86_exception *exception)
5600{
5601 return emulator_read_write(ctxt, addr, (void *)val, bytes,
5602 exception, &write_emultor);
5603}
5604
5605#define CMPXCHG_TYPE(t, ptr, old, new) \
5606 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
5607
5608#ifdef CONFIG_X86_64
5609# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
5610#else
5611# define CMPXCHG64(ptr, old, new) \
5612 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
5613#endif
5614
5615static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
5616 unsigned long addr,
5617 const void *old,
5618 const void *new,
5619 unsigned int bytes,
5620 struct x86_exception *exception)
5621{
5622 struct kvm_host_map map;
5623 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5624 gpa_t gpa;
5625 char *kaddr;
5626 bool exchanged;
5627
5628
5629 if (bytes > 8 || (bytes & (bytes - 1)))
5630 goto emul_write;
5631
5632 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
5633
5634 if (gpa == UNMAPPED_GVA ||
5635 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5636 goto emul_write;
5637
5638 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
5639 goto emul_write;
5640
5641 if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
5642 goto emul_write;
5643
5644 kaddr = map.hva + offset_in_page(gpa);
5645
5646 switch (bytes) {
5647 case 1:
5648 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
5649 break;
5650 case 2:
5651 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
5652 break;
5653 case 4:
5654 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
5655 break;
5656 case 8:
5657 exchanged = CMPXCHG64(kaddr, old, new);
5658 break;
5659 default:
5660 BUG();
5661 }
5662
5663 kvm_vcpu_unmap(vcpu, &map, true);
5664
5665 if (!exchanged)
5666 return X86EMUL_CMPXCHG_FAILED;
5667
5668 kvm_page_track_write(vcpu, gpa, new, bytes);
5669
5670 return X86EMUL_CONTINUE;
5671
5672emul_write:
5673 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
5674
5675 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
5676}
5677
5678static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
5679{
5680 int r = 0, i;
5681
5682 for (i = 0; i < vcpu->arch.pio.count; i++) {
5683 if (vcpu->arch.pio.in)
5684 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
5685 vcpu->arch.pio.size, pd);
5686 else
5687 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
5688 vcpu->arch.pio.port, vcpu->arch.pio.size,
5689 pd);
5690 if (r)
5691 break;
5692 pd += vcpu->arch.pio.size;
5693 }
5694 return r;
5695}
5696
5697static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
5698 unsigned short port, void *val,
5699 unsigned int count, bool in)
5700{
5701 vcpu->arch.pio.port = port;
5702 vcpu->arch.pio.in = in;
5703 vcpu->arch.pio.count = count;
5704 vcpu->arch.pio.size = size;
5705
5706 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
5707 vcpu->arch.pio.count = 0;
5708 return 1;
5709 }
5710
5711 vcpu->run->exit_reason = KVM_EXIT_IO;
5712 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
5713 vcpu->run->io.size = size;
5714 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
5715 vcpu->run->io.count = count;
5716 vcpu->run->io.port = port;
5717
5718 return 0;
5719}
5720
5721static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
5722 int size, unsigned short port, void *val,
5723 unsigned int count)
5724{
5725 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5726 int ret;
5727
5728 if (vcpu->arch.pio.count)
5729 goto data_avail;
5730
5731 memset(vcpu->arch.pio_data, 0, size * count);
5732
5733 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
5734 if (ret) {
5735data_avail:
5736 memcpy(val, vcpu->arch.pio_data, size * count);
5737 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
5738 vcpu->arch.pio.count = 0;
5739 return 1;
5740 }
5741
5742 return 0;
5743}
5744
5745static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
5746 int size, unsigned short port,
5747 const void *val, unsigned int count)
5748{
5749 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5750
5751 memcpy(vcpu->arch.pio_data, val, size * count);
5752 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
5753 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
5754}
5755
5756static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
5757{
5758 return kvm_x86_ops->get_segment_base(vcpu, seg);
5759}
5760
5761static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
5762{
5763 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
5764}
5765
5766static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
5767{
5768 if (!need_emulate_wbinvd(vcpu))
5769 return X86EMUL_CONTINUE;
5770
5771 if (kvm_x86_ops->has_wbinvd_exit()) {
5772 int cpu = get_cpu();
5773
5774 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
5775 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
5776 wbinvd_ipi, NULL, 1);
5777 put_cpu();
5778 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
5779 } else
5780 wbinvd();
5781 return X86EMUL_CONTINUE;
5782}
5783
5784int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
5785{
5786 kvm_emulate_wbinvd_noskip(vcpu);
5787 return kvm_skip_emulated_instruction(vcpu);
5788}
5789EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
5790
5791
5792
5793static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
5794{
5795 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
5796}
5797
5798static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
5799 unsigned long *dest)
5800{
5801 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
5802}
5803
5804static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
5805 unsigned long value)
5806{
5807
5808 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
5809}
5810
5811static u64 mk_cr_64(u64 curr_cr, u32 new_val)
5812{
5813 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
5814}
5815
5816static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
5817{
5818 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5819 unsigned long value;
5820
5821 switch (cr) {
5822 case 0:
5823 value = kvm_read_cr0(vcpu);
5824 break;
5825 case 2:
5826 value = vcpu->arch.cr2;
5827 break;
5828 case 3:
5829 value = kvm_read_cr3(vcpu);
5830 break;
5831 case 4:
5832 value = kvm_read_cr4(vcpu);
5833 break;
5834 case 8:
5835 value = kvm_get_cr8(vcpu);
5836 break;
5837 default:
5838 kvm_err("%s: unexpected cr %u\n", __func__, cr);
5839 return 0;
5840 }
5841
5842 return value;
5843}
5844
5845static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
5846{
5847 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5848 int res = 0;
5849
5850 switch (cr) {
5851 case 0:
5852 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
5853 break;
5854 case 2:
5855 vcpu->arch.cr2 = val;
5856 break;
5857 case 3:
5858 res = kvm_set_cr3(vcpu, val);
5859 break;
5860 case 4:
5861 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
5862 break;
5863 case 8:
5864 res = kvm_set_cr8(vcpu, val);
5865 break;
5866 default:
5867 kvm_err("%s: unexpected cr %u\n", __func__, cr);
5868 res = -1;
5869 }
5870
5871 return res;
5872}
5873
5874static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
5875{
5876 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
5877}
5878
5879static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5880{
5881 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
5882}
5883
5884static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5885{
5886 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
5887}
5888
5889static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5890{
5891 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
5892}
5893
5894static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5895{
5896 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
5897}
5898
5899static unsigned long emulator_get_cached_segment_base(
5900 struct x86_emulate_ctxt *ctxt, int seg)
5901{
5902 return get_segment_base(emul_to_vcpu(ctxt), seg);
5903}
5904
5905static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
5906 struct desc_struct *desc, u32 *base3,
5907 int seg)
5908{
5909 struct kvm_segment var;
5910
5911 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
5912 *selector = var.selector;
5913
5914 if (var.unusable) {
5915 memset(desc, 0, sizeof(*desc));
5916 if (base3)
5917 *base3 = 0;
5918 return false;
5919 }
5920
5921 if (var.g)
5922 var.limit >>= 12;
5923 set_desc_limit(desc, var.limit);
5924 set_desc_base(desc, (unsigned long)var.base);
5925#ifdef CONFIG_X86_64
5926 if (base3)
5927 *base3 = var.base >> 32;
5928#endif
5929 desc->type = var.type;
5930 desc->s = var.s;
5931 desc->dpl = var.dpl;
5932 desc->p = var.present;
5933 desc->avl = var.avl;
5934 desc->l = var.l;
5935 desc->d = var.db;
5936 desc->g = var.g;
5937
5938 return true;
5939}
5940
5941static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
5942 struct desc_struct *desc, u32 base3,
5943 int seg)
5944{
5945 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5946 struct kvm_segment var;
5947
5948 var.selector = selector;
5949 var.base = get_desc_base(desc);
5950#ifdef CONFIG_X86_64
5951 var.base |= ((u64)base3) << 32;
5952#endif
5953 var.limit = get_desc_limit(desc);
5954 if (desc->g)
5955 var.limit = (var.limit << 12) | 0xfff;
5956 var.type = desc->type;
5957 var.dpl = desc->dpl;
5958 var.db = desc->d;
5959 var.s = desc->s;
5960 var.l = desc->l;
5961 var.g = desc->g;
5962 var.avl = desc->avl;
5963 var.present = desc->p;
5964 var.unusable = !var.present;
5965 var.padding = 0;
5966
5967 kvm_set_segment(vcpu, &var, seg);
5968 return;
5969}
5970
5971static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
5972 u32 msr_index, u64 *pdata)
5973{
5974 struct msr_data msr;
5975 int r;
5976
5977 msr.index = msr_index;
5978 msr.host_initiated = false;
5979 r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
5980 if (r)
5981 return r;
5982
5983 *pdata = msr.data;
5984 return 0;
5985}
5986
5987static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
5988 u32 msr_index, u64 data)
5989{
5990 struct msr_data msr;
5991
5992 msr.data = data;
5993 msr.index = msr_index;
5994 msr.host_initiated = false;
5995 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
5996}
5997
5998static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
5999{
6000 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6001
6002 return vcpu->arch.smbase;
6003}
6004
6005static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
6006{
6007 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6008
6009 vcpu->arch.smbase = smbase;
6010}
6011
6012static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
6013 u32 pmc)
6014{
6015 return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
6016}
6017
6018static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
6019 u32 pmc, u64 *pdata)
6020{
6021 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
6022}
6023
6024static void emulator_halt(struct x86_emulate_ctxt *ctxt)
6025{
6026 emul_to_vcpu(ctxt)->arch.halt_request = 1;
6027}
6028
6029static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
6030 struct x86_instruction_info *info,
6031 enum x86_intercept_stage stage)
6032{
6033 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
6034}
6035
6036static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
6037 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
6038{
6039 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
6040}
6041
6042static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
6043{
6044 return kvm_register_read(emul_to_vcpu(ctxt), reg);
6045}
6046
6047static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
6048{
6049 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
6050}
6051
6052static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
6053{
6054 kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
6055}
6056
6057static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
6058{
6059 return emul_to_vcpu(ctxt)->arch.hflags;
6060}
6061
6062static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
6063{
6064 emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
6065}
6066
6067static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
6068 const char *smstate)
6069{
6070 return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
6071}
6072
6073static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
6074{
6075 kvm_smm_changed(emul_to_vcpu(ctxt));
6076}
6077
6078static const struct x86_emulate_ops emulate_ops = {
6079 .read_gpr = emulator_read_gpr,
6080 .write_gpr = emulator_write_gpr,
6081 .read_std = emulator_read_std,
6082 .write_std = emulator_write_std,
6083 .read_phys = kvm_read_guest_phys_system,
6084 .fetch = kvm_fetch_guest_virt,
6085 .read_emulated = emulator_read_emulated,
6086 .write_emulated = emulator_write_emulated,
6087 .cmpxchg_emulated = emulator_cmpxchg_emulated,
6088 .invlpg = emulator_invlpg,
6089 .pio_in_emulated = emulator_pio_in_emulated,
6090 .pio_out_emulated = emulator_pio_out_emulated,
6091 .get_segment = emulator_get_segment,
6092 .set_segment = emulator_set_segment,
6093 .get_cached_segment_base = emulator_get_cached_segment_base,
6094 .get_gdt = emulator_get_gdt,
6095 .get_idt = emulator_get_idt,
6096 .set_gdt = emulator_set_gdt,
6097 .set_idt = emulator_set_idt,
6098 .get_cr = emulator_get_cr,
6099 .set_cr = emulator_set_cr,
6100 .cpl = emulator_get_cpl,
6101 .get_dr = emulator_get_dr,
6102 .set_dr = emulator_set_dr,
6103 .get_smbase = emulator_get_smbase,
6104 .set_smbase = emulator_set_smbase,
6105 .set_msr = emulator_set_msr,
6106 .get_msr = emulator_get_msr,
6107 .check_pmc = emulator_check_pmc,
6108 .read_pmc = emulator_read_pmc,
6109 .halt = emulator_halt,
6110 .wbinvd = emulator_wbinvd,
6111 .fix_hypercall = emulator_fix_hypercall,
6112 .intercept = emulator_intercept,
6113 .get_cpuid = emulator_get_cpuid,
6114 .set_nmi_mask = emulator_set_nmi_mask,
6115 .get_hflags = emulator_get_hflags,
6116 .set_hflags = emulator_set_hflags,
6117 .pre_leave_smm = emulator_pre_leave_smm,
6118 .post_leave_smm = emulator_post_leave_smm,
6119};
6120
6121static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
6122{
6123 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
6124
6125
6126
6127
6128
6129
6130
6131 if (int_shadow & mask)
6132 mask = 0;
6133 if (unlikely(int_shadow || mask)) {
6134 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
6135 if (!mask)
6136 kvm_make_request(KVM_REQ_EVENT, vcpu);
6137 }
6138}
6139
6140static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
6141{
6142 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6143 if (ctxt->exception.vector == PF_VECTOR)
6144 return kvm_propagate_fault(vcpu, &ctxt->exception);
6145
6146 if (ctxt->exception.error_code_valid)
6147 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
6148 ctxt->exception.error_code);
6149 else
6150 kvm_queue_exception(vcpu, ctxt->exception.vector);
6151 return false;
6152}
6153
6154static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
6155{
6156 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6157 int cs_db, cs_l;
6158
6159 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
6160
6161 ctxt->eflags = kvm_get_rflags(vcpu);
6162 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
6163
6164 ctxt->eip = kvm_rip_read(vcpu);
6165 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
6166 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
6167 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
6168 cs_db ? X86EMUL_MODE_PROT32 :
6169 X86EMUL_MODE_PROT16;
6170 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
6171 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
6172 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
6173
6174 init_decode_cache(ctxt);
6175 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6176}
6177
6178int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
6179{
6180 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6181 int ret;
6182
6183 init_emulate_ctxt(vcpu);
6184
6185 ctxt->op_bytes = 2;
6186 ctxt->ad_bytes = 2;
6187 ctxt->_eip = ctxt->eip + inc_eip;
6188 ret = emulate_int_real(ctxt, irq);
6189
6190 if (ret != X86EMUL_CONTINUE)
6191 return EMULATE_FAIL;
6192
6193 ctxt->eip = ctxt->_eip;
6194 kvm_rip_write(vcpu, ctxt->eip);
6195 kvm_set_rflags(vcpu, ctxt->eflags);
6196
6197 return EMULATE_DONE;
6198}
6199EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
6200
6201static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
6202{
6203 int r = EMULATE_DONE;
6204
6205 ++vcpu->stat.insn_emulation_fail;
6206 trace_kvm_emulate_insn_failed(vcpu);
6207
6208 if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
6209 return EMULATE_FAIL;
6210
6211 if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
6212 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6213 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6214 vcpu->run->internal.ndata = 0;
6215 r = EMULATE_USER_EXIT;
6216 }
6217
6218 kvm_queue_exception(vcpu, UD_VECTOR);
6219
6220 return r;
6221}
6222
6223static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
6224 bool write_fault_to_shadow_pgtable,
6225 int emulation_type)
6226{
6227 gpa_t gpa = cr2;
6228 kvm_pfn_t pfn;
6229
6230 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
6231 return false;
6232
6233 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
6234 return false;
6235
6236 if (!vcpu->arch.mmu->direct_map) {
6237
6238
6239
6240
6241 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
6242
6243
6244
6245
6246
6247 if (gpa == UNMAPPED_GVA)
6248 return true;
6249 }
6250
6251
6252
6253
6254
6255
6256
6257 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
6258
6259
6260
6261
6262
6263 if (is_error_noslot_pfn(pfn))
6264 return false;
6265
6266 kvm_release_pfn_clean(pfn);
6267
6268
6269 if (vcpu->arch.mmu->direct_map) {
6270 unsigned int indirect_shadow_pages;
6271
6272 spin_lock(&vcpu->kvm->mmu_lock);
6273 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
6274 spin_unlock(&vcpu->kvm->mmu_lock);
6275
6276 if (indirect_shadow_pages)
6277 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6278
6279 return true;
6280 }
6281
6282
6283
6284
6285
6286
6287 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6288
6289
6290
6291
6292
6293
6294 return !write_fault_to_shadow_pgtable;
6295}
6296
6297static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
6298 unsigned long cr2, int emulation_type)
6299{
6300 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6301 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
6302
6303 last_retry_eip = vcpu->arch.last_retry_eip;
6304 last_retry_addr = vcpu->arch.last_retry_addr;
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
6320
6321 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
6322 return false;
6323
6324 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
6325 return false;
6326
6327 if (x86_page_table_writing_insn(ctxt))
6328 return false;
6329
6330 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
6331 return false;
6332
6333 vcpu->arch.last_retry_eip = ctxt->eip;
6334 vcpu->arch.last_retry_addr = cr2;
6335
6336 if (!vcpu->arch.mmu->direct_map)
6337 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
6338
6339 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6340
6341 return true;
6342}
6343
6344static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
6345static int complete_emulated_pio(struct kvm_vcpu *vcpu);
6346
6347static void kvm_smm_changed(struct kvm_vcpu *vcpu)
6348{
6349 if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
6350
6351 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
6352
6353
6354 kvm_make_request(KVM_REQ_EVENT, vcpu);
6355 }
6356
6357 kvm_mmu_reset_context(vcpu);
6358}
6359
6360static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
6361 unsigned long *db)
6362{
6363 u32 dr6 = 0;
6364 int i;
6365 u32 enable, rwlen;
6366
6367 enable = dr7;
6368 rwlen = dr7 >> 16;
6369 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
6370 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
6371 dr6 |= (1 << i);
6372 return dr6;
6373}
6374
6375static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
6376{
6377 struct kvm_run *kvm_run = vcpu->run;
6378
6379 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
6380 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
6381 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
6382 kvm_run->debug.arch.exception = DB_VECTOR;
6383 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6384 *r = EMULATE_USER_EXIT;
6385 } else {
6386 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
6387 }
6388}
6389
6390int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
6391{
6392 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
6393 int r = EMULATE_DONE;
6394
6395 kvm_x86_ops->skip_emulated_instruction(vcpu);
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405 if (unlikely(rflags & X86_EFLAGS_TF))
6406 kvm_vcpu_do_singlestep(vcpu, &r);
6407 return r == EMULATE_DONE;
6408}
6409EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
6410
6411static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
6412{
6413 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
6414 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
6415 struct kvm_run *kvm_run = vcpu->run;
6416 unsigned long eip = kvm_get_linear_rip(vcpu);
6417 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6418 vcpu->arch.guest_debug_dr7,
6419 vcpu->arch.eff_db);
6420
6421 if (dr6 != 0) {
6422 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
6423 kvm_run->debug.arch.pc = eip;
6424 kvm_run->debug.arch.exception = DB_VECTOR;
6425 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6426 *r = EMULATE_USER_EXIT;
6427 return true;
6428 }
6429 }
6430
6431 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
6432 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
6433 unsigned long eip = kvm_get_linear_rip(vcpu);
6434 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6435 vcpu->arch.dr7,
6436 vcpu->arch.db);
6437
6438 if (dr6 != 0) {
6439 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
6440 vcpu->arch.dr6 |= dr6 | DR6_RTM;
6441 kvm_queue_exception(vcpu, DB_VECTOR);
6442 *r = EMULATE_DONE;
6443 return true;
6444 }
6445 }
6446
6447 return false;
6448}
6449
6450static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
6451{
6452 switch (ctxt->opcode_len) {
6453 case 1:
6454 switch (ctxt->b) {
6455 case 0xe4:
6456 case 0xe5:
6457 case 0xec:
6458 case 0xed:
6459 case 0xe6:
6460 case 0xe7:
6461 case 0xee:
6462 case 0xef:
6463 case 0x6c:
6464 case 0x6d:
6465 case 0x6e:
6466 case 0x6f:
6467 return true;
6468 }
6469 break;
6470 case 2:
6471 switch (ctxt->b) {
6472 case 0x33:
6473 return true;
6474 }
6475 break;
6476 }
6477
6478 return false;
6479}
6480
6481int x86_emulate_instruction(struct kvm_vcpu *vcpu,
6482 unsigned long cr2,
6483 int emulation_type,
6484 void *insn,
6485 int insn_len)
6486{
6487 int r;
6488 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6489 bool writeback = true;
6490 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
6491
6492 vcpu->arch.l1tf_flush_l1d = true;
6493
6494
6495
6496
6497
6498 vcpu->arch.write_fault_to_shadow_pgtable = false;
6499 kvm_clear_exception_queue(vcpu);
6500
6501 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
6502 init_emulate_ctxt(vcpu);
6503
6504
6505
6506
6507
6508
6509
6510 if (!(emulation_type & EMULTYPE_SKIP) &&
6511 kvm_vcpu_check_breakpoint(vcpu, &r))
6512 return r;
6513
6514 ctxt->interruptibility = 0;
6515 ctxt->have_exception = false;
6516 ctxt->exception.vector = -1;
6517 ctxt->perm_ok = false;
6518
6519 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
6520
6521 r = x86_decode_insn(ctxt, insn, insn_len);
6522
6523 trace_kvm_emulate_insn_start(vcpu);
6524 ++vcpu->stat.insn_emulation;
6525 if (r != EMULATION_OK) {
6526 if (emulation_type & EMULTYPE_TRAP_UD)
6527 return EMULATE_FAIL;
6528 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
6529 emulation_type))
6530 return EMULATE_DONE;
6531 if (ctxt->have_exception && inject_emulated_exception(vcpu))
6532 return EMULATE_DONE;
6533 if (emulation_type & EMULTYPE_SKIP)
6534 return EMULATE_FAIL;
6535 return handle_emulation_failure(vcpu, emulation_type);
6536 }
6537 }
6538
6539 if ((emulation_type & EMULTYPE_VMWARE) &&
6540 !is_vmware_backdoor_opcode(ctxt))
6541 return EMULATE_FAIL;
6542
6543 if (emulation_type & EMULTYPE_SKIP) {
6544 kvm_rip_write(vcpu, ctxt->_eip);
6545 if (ctxt->eflags & X86_EFLAGS_RF)
6546 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
6547 return EMULATE_DONE;
6548 }
6549
6550 if (retry_instruction(ctxt, cr2, emulation_type))
6551 return EMULATE_DONE;
6552
6553
6554
6555 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
6556 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6557 emulator_invalidate_register_cache(ctxt);
6558 }
6559
6560restart:
6561
6562 ctxt->exception.address = cr2;
6563
6564 r = x86_emulate_insn(ctxt);
6565
6566 if (r == EMULATION_INTERCEPTED)
6567 return EMULATE_DONE;
6568
6569 if (r == EMULATION_FAILED) {
6570 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
6571 emulation_type))
6572 return EMULATE_DONE;
6573
6574 return handle_emulation_failure(vcpu, emulation_type);
6575 }
6576
6577 if (ctxt->have_exception) {
6578 r = EMULATE_DONE;
6579 if (inject_emulated_exception(vcpu))
6580 return r;
6581 } else if (vcpu->arch.pio.count) {
6582 if (!vcpu->arch.pio.in) {
6583
6584 vcpu->arch.pio.count = 0;
6585 } else {
6586 writeback = false;
6587 vcpu->arch.complete_userspace_io = complete_emulated_pio;
6588 }
6589 r = EMULATE_USER_EXIT;
6590 } else if (vcpu->mmio_needed) {
6591 if (!vcpu->mmio_is_write)
6592 writeback = false;
6593 r = EMULATE_USER_EXIT;
6594 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6595 } else if (r == EMULATION_RESTART)
6596 goto restart;
6597 else
6598 r = EMULATE_DONE;
6599
6600 if (writeback) {
6601 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
6602 toggle_interruptibility(vcpu, ctxt->interruptibility);
6603 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6604 if (!ctxt->have_exception ||
6605 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
6606 kvm_rip_write(vcpu, ctxt->eip);
6607 if (r == EMULATE_DONE && ctxt->tf)
6608 kvm_vcpu_do_singlestep(vcpu, &r);
6609 __kvm_set_rflags(vcpu, ctxt->eflags);
6610 }
6611
6612
6613
6614
6615
6616
6617
6618 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
6619 kvm_make_request(KVM_REQ_EVENT, vcpu);
6620 } else
6621 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
6622
6623 return r;
6624}
6625
6626int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
6627{
6628 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
6629}
6630EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
6631
6632int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
6633 void *insn, int insn_len)
6634{
6635 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
6636}
6637EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
6638
6639static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
6640{
6641 vcpu->arch.pio.count = 0;
6642 return 1;
6643}
6644
6645static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
6646{
6647 vcpu->arch.pio.count = 0;
6648
6649 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
6650 return 1;
6651
6652 return kvm_skip_emulated_instruction(vcpu);
6653}
6654
6655static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
6656 unsigned short port)
6657{
6658 unsigned long val = kvm_rax_read(vcpu);
6659 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
6660 size, port, &val, 1);
6661 if (ret)
6662 return ret;
6663
6664
6665
6666
6667
6668 if (port == 0x7e &&
6669 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
6670 vcpu->arch.complete_userspace_io =
6671 complete_fast_pio_out_port_0x7e;
6672 kvm_skip_emulated_instruction(vcpu);
6673 } else {
6674 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
6675 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
6676 }
6677 return 0;
6678}
6679
6680static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
6681{
6682 unsigned long val;
6683
6684
6685 BUG_ON(vcpu->arch.pio.count != 1);
6686
6687 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
6688 vcpu->arch.pio.count = 0;
6689 return 1;
6690 }
6691
6692
6693 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
6694
6695
6696
6697
6698
6699 emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
6700 vcpu->arch.pio.port, &val, 1);
6701 kvm_rax_write(vcpu, val);
6702
6703 return kvm_skip_emulated_instruction(vcpu);
6704}
6705
6706static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
6707 unsigned short port)
6708{
6709 unsigned long val;
6710 int ret;
6711
6712
6713 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
6714
6715 ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
6716 &val, 1);
6717 if (ret) {
6718 kvm_rax_write(vcpu, val);
6719 return ret;
6720 }
6721
6722 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
6723 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
6724
6725 return 0;
6726}
6727
6728int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
6729{
6730 int ret;
6731
6732 if (in)
6733 ret = kvm_fast_pio_in(vcpu, size, port);
6734 else
6735 ret = kvm_fast_pio_out(vcpu, size, port);
6736 return ret && kvm_skip_emulated_instruction(vcpu);
6737}
6738EXPORT_SYMBOL_GPL(kvm_fast_pio);
6739
6740static int kvmclock_cpu_down_prep(unsigned int cpu)
6741{
6742 __this_cpu_write(cpu_tsc_khz, 0);
6743 return 0;
6744}
6745
6746static void tsc_khz_changed(void *data)
6747{
6748 struct cpufreq_freqs *freq = data;
6749 unsigned long khz = 0;
6750
6751 if (data)
6752 khz = freq->new;
6753 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
6754 khz = cpufreq_quick_get(raw_smp_processor_id());
6755 if (!khz)
6756 khz = tsc_khz;
6757 __this_cpu_write(cpu_tsc_khz, khz);
6758}
6759
6760#ifdef CONFIG_X86_64
6761static void kvm_hyperv_tsc_notifier(void)
6762{
6763 struct kvm *kvm;
6764 struct kvm_vcpu *vcpu;
6765 int cpu;
6766
6767 mutex_lock(&kvm_lock);
6768 list_for_each_entry(kvm, &vm_list, vm_list)
6769 kvm_make_mclock_inprogress_request(kvm);
6770
6771 hyperv_stop_tsc_emulation();
6772
6773
6774 for_each_present_cpu(cpu)
6775 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
6776 kvm_max_guest_tsc_khz = tsc_khz;
6777
6778 list_for_each_entry(kvm, &vm_list, vm_list) {
6779 struct kvm_arch *ka = &kvm->arch;
6780
6781 spin_lock(&ka->pvclock_gtod_sync_lock);
6782
6783 pvclock_update_vm_gtod_copy(kvm);
6784
6785 kvm_for_each_vcpu(cpu, vcpu, kvm)
6786 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6787
6788 kvm_for_each_vcpu(cpu, vcpu, kvm)
6789 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
6790
6791 spin_unlock(&ka->pvclock_gtod_sync_lock);
6792 }
6793 mutex_unlock(&kvm_lock);
6794}
6795#endif
6796
6797static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
6798{
6799 struct kvm *kvm;
6800 struct kvm_vcpu *vcpu;
6801 int i, send_ipi = 0;
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
6843
6844 mutex_lock(&kvm_lock);
6845 list_for_each_entry(kvm, &vm_list, vm_list) {
6846 kvm_for_each_vcpu(i, vcpu, kvm) {
6847 if (vcpu->cpu != cpu)
6848 continue;
6849 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6850 if (vcpu->cpu != raw_smp_processor_id())
6851 send_ipi = 1;
6852 }
6853 }
6854 mutex_unlock(&kvm_lock);
6855
6856 if (freq->old < freq->new && send_ipi) {
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
6870 }
6871}
6872
6873static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
6874 void *data)
6875{
6876 struct cpufreq_freqs *freq = data;
6877 int cpu;
6878
6879 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
6880 return 0;
6881 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
6882 return 0;
6883
6884 for_each_cpu(cpu, freq->policy->cpus)
6885 __kvmclock_cpufreq_notifier(freq, cpu);
6886
6887 return 0;
6888}
6889
6890static struct notifier_block kvmclock_cpufreq_notifier_block = {
6891 .notifier_call = kvmclock_cpufreq_notifier
6892};
6893
6894static int kvmclock_cpu_online(unsigned int cpu)
6895{
6896 tsc_khz_changed(NULL);
6897 return 0;
6898}
6899
6900static void kvm_timer_init(void)
6901{
6902 max_tsc_khz = tsc_khz;
6903
6904 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
6905#ifdef CONFIG_CPU_FREQ
6906 struct cpufreq_policy policy;
6907 int cpu;
6908
6909 memset(&policy, 0, sizeof(policy));
6910 cpu = get_cpu();
6911 cpufreq_get_policy(&policy, cpu);
6912 if (policy.cpuinfo.max_freq)
6913 max_tsc_khz = policy.cpuinfo.max_freq;
6914 put_cpu();
6915#endif
6916 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
6917 CPUFREQ_TRANSITION_NOTIFIER);
6918 }
6919
6920 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
6921 kvmclock_cpu_online, kvmclock_cpu_down_prep);
6922}
6923
6924DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
6925EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
6926
6927int kvm_is_in_guest(void)
6928{
6929 return __this_cpu_read(current_vcpu) != NULL;
6930}
6931
6932static int kvm_is_user_mode(void)
6933{
6934 int user_mode = 3;
6935
6936 if (__this_cpu_read(current_vcpu))
6937 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
6938
6939 return user_mode != 0;
6940}
6941
6942static unsigned long kvm_get_guest_ip(void)
6943{
6944 unsigned long ip = 0;
6945
6946 if (__this_cpu_read(current_vcpu))
6947 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
6948
6949 return ip;
6950}
6951
6952static void kvm_handle_intel_pt_intr(void)
6953{
6954 struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
6955
6956 kvm_make_request(KVM_REQ_PMI, vcpu);
6957 __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
6958 (unsigned long *)&vcpu->arch.pmu.global_status);
6959}
6960
6961static struct perf_guest_info_callbacks kvm_guest_cbs = {
6962 .is_in_guest = kvm_is_in_guest,
6963 .is_user_mode = kvm_is_user_mode,
6964 .get_guest_ip = kvm_get_guest_ip,
6965 .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
6966};
6967
6968#ifdef CONFIG_X86_64
6969static void pvclock_gtod_update_fn(struct work_struct *work)
6970{
6971 struct kvm *kvm;
6972
6973 struct kvm_vcpu *vcpu;
6974 int i;
6975
6976 mutex_lock(&kvm_lock);
6977 list_for_each_entry(kvm, &vm_list, vm_list)
6978 kvm_for_each_vcpu(i, vcpu, kvm)
6979 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
6980 atomic_set(&kvm_guest_has_master_clock, 0);
6981 mutex_unlock(&kvm_lock);
6982}
6983
6984static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
6985
6986
6987
6988
6989static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
6990 void *priv)
6991{
6992 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
6993 struct timekeeper *tk = priv;
6994
6995 update_pvclock_gtod(tk);
6996
6997
6998
6999
7000 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
7001 atomic_read(&kvm_guest_has_master_clock) != 0)
7002 queue_work(system_long_wq, &pvclock_gtod_work);
7003
7004 return 0;
7005}
7006
7007static struct notifier_block pvclock_gtod_notifier = {
7008 .notifier_call = pvclock_gtod_notify,
7009};
7010#endif
7011
7012int kvm_arch_init(void *opaque)
7013{
7014 int r;
7015 struct kvm_x86_ops *ops = opaque;
7016
7017 if (kvm_x86_ops) {
7018 printk(KERN_ERR "kvm: already loaded the other module\n");
7019 r = -EEXIST;
7020 goto out;
7021 }
7022
7023 if (!ops->cpu_has_kvm_support()) {
7024 printk(KERN_ERR "kvm: no hardware support\n");
7025 r = -EOPNOTSUPP;
7026 goto out;
7027 }
7028 if (ops->disabled_by_bios()) {
7029 printk(KERN_ERR "kvm: disabled by bios\n");
7030 r = -EOPNOTSUPP;
7031 goto out;
7032 }
7033
7034
7035
7036
7037
7038
7039 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
7040 printk(KERN_ERR "kvm: inadequate fpu\n");
7041 r = -EOPNOTSUPP;
7042 goto out;
7043 }
7044
7045 r = -ENOMEM;
7046 x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
7047 __alignof__(struct fpu), SLAB_ACCOUNT,
7048 NULL);
7049 if (!x86_fpu_cache) {
7050 printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
7051 goto out;
7052 }
7053
7054 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
7055 if (!shared_msrs) {
7056 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
7057 goto out_free_x86_fpu_cache;
7058 }
7059
7060 r = kvm_mmu_module_init();
7061 if (r)
7062 goto out_free_percpu;
7063
7064 kvm_x86_ops = ops;
7065
7066 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
7067 PT_DIRTY_MASK, PT64_NX_MASK, 0,
7068 PT_PRESENT_MASK, 0, sme_me_mask);
7069 kvm_timer_init();
7070
7071 perf_register_guest_info_callbacks(&kvm_guest_cbs);
7072
7073 if (boot_cpu_has(X86_FEATURE_XSAVE))
7074 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
7075
7076 kvm_lapic_init();
7077 if (pi_inject_timer == -1)
7078 pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
7079#ifdef CONFIG_X86_64
7080 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
7081
7082 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7083 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
7084#endif
7085
7086 return 0;
7087
7088out_free_percpu:
7089 free_percpu(shared_msrs);
7090out_free_x86_fpu_cache:
7091 kmem_cache_destroy(x86_fpu_cache);
7092out:
7093 return r;
7094}
7095
7096void kvm_arch_exit(void)
7097{
7098#ifdef CONFIG_X86_64
7099 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7100 clear_hv_tscchange_cb();
7101#endif
7102 kvm_lapic_exit();
7103 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
7104
7105 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
7106 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
7107 CPUFREQ_TRANSITION_NOTIFIER);
7108 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
7109#ifdef CONFIG_X86_64
7110 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
7111#endif
7112 kvm_x86_ops = NULL;
7113 kvm_mmu_module_exit();
7114 free_percpu(shared_msrs);
7115 kmem_cache_destroy(x86_fpu_cache);
7116}
7117
7118int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
7119{
7120 ++vcpu->stat.halt_exits;
7121 if (lapic_in_kernel(vcpu)) {
7122 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
7123 return 1;
7124 } else {
7125 vcpu->run->exit_reason = KVM_EXIT_HLT;
7126 return 0;
7127 }
7128}
7129EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
7130
7131int kvm_emulate_halt(struct kvm_vcpu *vcpu)
7132{
7133 int ret = kvm_skip_emulated_instruction(vcpu);
7134
7135
7136
7137
7138 return kvm_vcpu_halt(vcpu) && ret;
7139}
7140EXPORT_SYMBOL_GPL(kvm_emulate_halt);
7141
7142#ifdef CONFIG_X86_64
7143static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
7144 unsigned long clock_type)
7145{
7146 struct kvm_clock_pairing clock_pairing;
7147 struct timespec64 ts;
7148 u64 cycle;
7149 int ret;
7150
7151 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
7152 return -KVM_EOPNOTSUPP;
7153
7154 if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
7155 return -KVM_EOPNOTSUPP;
7156
7157 clock_pairing.sec = ts.tv_sec;
7158 clock_pairing.nsec = ts.tv_nsec;
7159 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
7160 clock_pairing.flags = 0;
7161 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
7162
7163 ret = 0;
7164 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
7165 sizeof(struct kvm_clock_pairing)))
7166 ret = -KVM_EFAULT;
7167
7168 return ret;
7169}
7170#endif
7171
7172
7173
7174
7175
7176
7177static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
7178{
7179 struct kvm_lapic_irq lapic_irq;
7180
7181 lapic_irq.shorthand = 0;
7182 lapic_irq.dest_mode = 0;
7183 lapic_irq.level = 0;
7184 lapic_irq.dest_id = apicid;
7185 lapic_irq.msi_redir_hint = false;
7186
7187 lapic_irq.delivery_mode = APIC_DM_REMRD;
7188 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
7189}
7190
7191void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
7192{
7193 if (!lapic_in_kernel(vcpu)) {
7194 WARN_ON_ONCE(vcpu->arch.apicv_active);
7195 return;
7196 }
7197 if (!vcpu->arch.apicv_active)
7198 return;
7199
7200 vcpu->arch.apicv_active = false;
7201 kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
7202}
7203
7204static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
7205{
7206 struct kvm_vcpu *target = NULL;
7207 struct kvm_apic_map *map;
7208
7209 rcu_read_lock();
7210 map = rcu_dereference(kvm->arch.apic_map);
7211
7212 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
7213 target = map->phys_map[dest_id]->vcpu;
7214
7215 rcu_read_unlock();
7216
7217 if (target && READ_ONCE(target->ready))
7218 kvm_vcpu_yield_to(target);
7219}
7220
7221int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
7222{
7223 unsigned long nr, a0, a1, a2, a3, ret;
7224 int op_64_bit;
7225
7226 if (kvm_hv_hypercall_enabled(vcpu->kvm))
7227 return kvm_hv_hypercall(vcpu);
7228
7229 nr = kvm_rax_read(vcpu);
7230 a0 = kvm_rbx_read(vcpu);
7231 a1 = kvm_rcx_read(vcpu);
7232 a2 = kvm_rdx_read(vcpu);
7233 a3 = kvm_rsi_read(vcpu);
7234
7235 trace_kvm_hypercall(nr, a0, a1, a2, a3);
7236
7237 op_64_bit = is_64_bit_mode(vcpu);
7238 if (!op_64_bit) {
7239 nr &= 0xFFFFFFFF;
7240 a0 &= 0xFFFFFFFF;
7241 a1 &= 0xFFFFFFFF;
7242 a2 &= 0xFFFFFFFF;
7243 a3 &= 0xFFFFFFFF;
7244 }
7245
7246 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
7247 ret = -KVM_EPERM;
7248 goto out;
7249 }
7250
7251 switch (nr) {
7252 case KVM_HC_VAPIC_POLL_IRQ:
7253 ret = 0;
7254 break;
7255 case KVM_HC_KICK_CPU:
7256 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
7257 kvm_sched_yield(vcpu->kvm, a1);
7258 ret = 0;
7259 break;
7260#ifdef CONFIG_X86_64
7261 case KVM_HC_CLOCK_PAIRING:
7262 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
7263 break;
7264#endif
7265 case KVM_HC_SEND_IPI:
7266 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
7267 break;
7268 case KVM_HC_SCHED_YIELD:
7269 kvm_sched_yield(vcpu->kvm, a0);
7270 ret = 0;
7271 break;
7272 default:
7273 ret = -KVM_ENOSYS;
7274 break;
7275 }
7276out:
7277 if (!op_64_bit)
7278 ret = (u32)ret;
7279 kvm_rax_write(vcpu, ret);
7280
7281 ++vcpu->stat.hypercalls;
7282 return kvm_skip_emulated_instruction(vcpu);
7283}
7284EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
7285
7286static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
7287{
7288 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7289 char instruction[3];
7290 unsigned long rip = kvm_rip_read(vcpu);
7291
7292 kvm_x86_ops->patch_hypercall(vcpu, instruction);
7293
7294 return emulator_write_emulated(ctxt, rip, instruction, 3,
7295 &ctxt->exception);
7296}
7297
7298static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
7299{
7300 return vcpu->run->request_interrupt_window &&
7301 likely(!pic_in_kernel(vcpu->kvm));
7302}
7303
7304static void post_kvm_run_save(struct kvm_vcpu *vcpu)
7305{
7306 struct kvm_run *kvm_run = vcpu->run;
7307
7308 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
7309 kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
7310 kvm_run->cr8 = kvm_get_cr8(vcpu);
7311 kvm_run->apic_base = kvm_get_apic_base(vcpu);
7312 kvm_run->ready_for_interrupt_injection =
7313 pic_in_kernel(vcpu->kvm) ||
7314 kvm_vcpu_ready_for_interrupt_injection(vcpu);
7315}
7316
7317static void update_cr8_intercept(struct kvm_vcpu *vcpu)
7318{
7319 int max_irr, tpr;
7320
7321 if (!kvm_x86_ops->update_cr8_intercept)
7322 return;
7323
7324 if (!lapic_in_kernel(vcpu))
7325 return;
7326
7327 if (vcpu->arch.apicv_active)
7328 return;
7329
7330 if (!vcpu->arch.apic->vapic_addr)
7331 max_irr = kvm_lapic_find_highest_irr(vcpu);
7332 else
7333 max_irr = -1;
7334
7335 if (max_irr != -1)
7336 max_irr >>= 4;
7337
7338 tpr = kvm_lapic_get_cr8(vcpu);
7339
7340 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
7341}
7342
7343static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
7344{
7345 int r;
7346
7347
7348
7349 if (vcpu->arch.exception.injected)
7350 kvm_x86_ops->queue_exception(vcpu);
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365 else if (!vcpu->arch.exception.pending) {
7366 if (vcpu->arch.nmi_injected)
7367 kvm_x86_ops->set_nmi(vcpu);
7368 else if (vcpu->arch.interrupt.injected)
7369 kvm_x86_ops->set_irq(vcpu);
7370 }
7371
7372
7373
7374
7375
7376
7377
7378 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
7379 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
7380 if (r != 0)
7381 return r;
7382 }
7383
7384
7385 if (vcpu->arch.exception.pending) {
7386 trace_kvm_inj_exception(vcpu->arch.exception.nr,
7387 vcpu->arch.exception.has_error_code,
7388 vcpu->arch.exception.error_code);
7389
7390 WARN_ON_ONCE(vcpu->arch.exception.injected);
7391 vcpu->arch.exception.pending = false;
7392 vcpu->arch.exception.injected = true;
7393
7394 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
7395 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
7396 X86_EFLAGS_RF);
7397
7398 if (vcpu->arch.exception.nr == DB_VECTOR) {
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409 kvm_deliver_exception_payload(vcpu);
7410 if (vcpu->arch.dr7 & DR7_GD) {
7411 vcpu->arch.dr7 &= ~DR7_GD;
7412 kvm_update_dr7(vcpu);
7413 }
7414 }
7415
7416 kvm_x86_ops->queue_exception(vcpu);
7417 }
7418
7419
7420 if (kvm_event_needs_reinjection(vcpu))
7421 return 0;
7422
7423 if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
7424 kvm_x86_ops->smi_allowed(vcpu)) {
7425 vcpu->arch.smi_pending = false;
7426 ++vcpu->arch.smi_count;
7427 enter_smm(vcpu);
7428 } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
7429 --vcpu->arch.nmi_pending;
7430 vcpu->arch.nmi_injected = true;
7431 kvm_x86_ops->set_nmi(vcpu);
7432 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
7433
7434
7435
7436
7437
7438
7439
7440 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
7441 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
7442 if (r != 0)
7443 return r;
7444 }
7445 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
7446 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
7447 false);
7448 kvm_x86_ops->set_irq(vcpu);
7449 }
7450 }
7451
7452 return 0;
7453}
7454
7455static void process_nmi(struct kvm_vcpu *vcpu)
7456{
7457 unsigned limit = 2;
7458
7459
7460
7461
7462
7463
7464 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
7465 limit = 1;
7466
7467 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
7468 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
7469 kvm_make_request(KVM_REQ_EVENT, vcpu);
7470}
7471
7472static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
7473{
7474 u32 flags = 0;
7475 flags |= seg->g << 23;
7476 flags |= seg->db << 22;
7477 flags |= seg->l << 21;
7478 flags |= seg->avl << 20;
7479 flags |= seg->present << 15;
7480 flags |= seg->dpl << 13;
7481 flags |= seg->s << 12;
7482 flags |= seg->type << 8;
7483 return flags;
7484}
7485
7486static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
7487{
7488 struct kvm_segment seg;
7489 int offset;
7490
7491 kvm_get_segment(vcpu, &seg, n);
7492 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
7493
7494 if (n < 3)
7495 offset = 0x7f84 + n * 12;
7496 else
7497 offset = 0x7f2c + (n - 3) * 12;
7498
7499 put_smstate(u32, buf, offset + 8, seg.base);
7500 put_smstate(u32, buf, offset + 4, seg.limit);
7501 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
7502}
7503
7504#ifdef CONFIG_X86_64
7505static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
7506{
7507 struct kvm_segment seg;
7508 int offset;
7509 u16 flags;
7510
7511 kvm_get_segment(vcpu, &seg, n);
7512 offset = 0x7e00 + n * 16;
7513
7514 flags = enter_smm_get_segment_flags(&seg) >> 8;
7515 put_smstate(u16, buf, offset, seg.selector);
7516 put_smstate(u16, buf, offset + 2, flags);
7517 put_smstate(u32, buf, offset + 4, seg.limit);
7518 put_smstate(u64, buf, offset + 8, seg.base);
7519}
7520#endif
7521
7522static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
7523{
7524 struct desc_ptr dt;
7525 struct kvm_segment seg;
7526 unsigned long val;
7527 int i;
7528
7529 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
7530 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
7531 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
7532 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
7533
7534 for (i = 0; i < 8; i++)
7535 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
7536
7537 kvm_get_dr(vcpu, 6, &val);
7538 put_smstate(u32, buf, 0x7fcc, (u32)val);
7539 kvm_get_dr(vcpu, 7, &val);
7540 put_smstate(u32, buf, 0x7fc8, (u32)val);
7541
7542 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
7543 put_smstate(u32, buf, 0x7fc4, seg.selector);
7544 put_smstate(u32, buf, 0x7f64, seg.base);
7545 put_smstate(u32, buf, 0x7f60, seg.limit);
7546 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
7547
7548 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
7549 put_smstate(u32, buf, 0x7fc0, seg.selector);
7550 put_smstate(u32, buf, 0x7f80, seg.base);
7551 put_smstate(u32, buf, 0x7f7c, seg.limit);
7552 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
7553
7554 kvm_x86_ops->get_gdt(vcpu, &dt);
7555 put_smstate(u32, buf, 0x7f74, dt.address);
7556 put_smstate(u32, buf, 0x7f70, dt.size);
7557
7558 kvm_x86_ops->get_idt(vcpu, &dt);
7559 put_smstate(u32, buf, 0x7f58, dt.address);
7560 put_smstate(u32, buf, 0x7f54, dt.size);
7561
7562 for (i = 0; i < 6; i++)
7563 enter_smm_save_seg_32(vcpu, buf, i);
7564
7565 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
7566
7567
7568 put_smstate(u32, buf, 0x7efc, 0x00020000);
7569 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
7570}
7571
7572#ifdef CONFIG_X86_64
7573static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
7574{
7575 struct desc_ptr dt;
7576 struct kvm_segment seg;
7577 unsigned long val;
7578 int i;
7579
7580 for (i = 0; i < 16; i++)
7581 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
7582
7583 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
7584 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
7585
7586 kvm_get_dr(vcpu, 6, &val);
7587 put_smstate(u64, buf, 0x7f68, val);
7588 kvm_get_dr(vcpu, 7, &val);
7589 put_smstate(u64, buf, 0x7f60, val);
7590
7591 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
7592 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
7593 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
7594
7595 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
7596
7597
7598 put_smstate(u32, buf, 0x7efc, 0x00020064);
7599
7600 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
7601
7602 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
7603 put_smstate(u16, buf, 0x7e90, seg.selector);
7604 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
7605 put_smstate(u32, buf, 0x7e94, seg.limit);
7606 put_smstate(u64, buf, 0x7e98, seg.base);
7607
7608 kvm_x86_ops->get_idt(vcpu, &dt);
7609 put_smstate(u32, buf, 0x7e84, dt.size);
7610 put_smstate(u64, buf, 0x7e88, dt.address);
7611
7612 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
7613 put_smstate(u16, buf, 0x7e70, seg.selector);
7614 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
7615 put_smstate(u32, buf, 0x7e74, seg.limit);
7616 put_smstate(u64, buf, 0x7e78, seg.base);
7617
7618 kvm_x86_ops->get_gdt(vcpu, &dt);
7619 put_smstate(u32, buf, 0x7e64, dt.size);
7620 put_smstate(u64, buf, 0x7e68, dt.address);
7621
7622 for (i = 0; i < 6; i++)
7623 enter_smm_save_seg_64(vcpu, buf, i);
7624}
7625#endif
7626
7627static void enter_smm(struct kvm_vcpu *vcpu)
7628{
7629 struct kvm_segment cs, ds;
7630 struct desc_ptr dt;
7631 char buf[512];
7632 u32 cr0;
7633
7634 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
7635 memset(buf, 0, 512);
7636#ifdef CONFIG_X86_64
7637 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
7638 enter_smm_save_state_64(vcpu, buf);
7639 else
7640#endif
7641 enter_smm_save_state_32(vcpu, buf);
7642
7643
7644
7645
7646
7647
7648 kvm_x86_ops->pre_enter_smm(vcpu, buf);
7649
7650 vcpu->arch.hflags |= HF_SMM_MASK;
7651 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
7652
7653 if (kvm_x86_ops->get_nmi_mask(vcpu))
7654 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
7655 else
7656 kvm_x86_ops->set_nmi_mask(vcpu, true);
7657
7658 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
7659 kvm_rip_write(vcpu, 0x8000);
7660
7661 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
7662 kvm_x86_ops->set_cr0(vcpu, cr0);
7663 vcpu->arch.cr0 = cr0;
7664
7665 kvm_x86_ops->set_cr4(vcpu, 0);
7666
7667
7668 dt.address = dt.size = 0;
7669 kvm_x86_ops->set_idt(vcpu, &dt);
7670
7671 __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
7672
7673 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
7674 cs.base = vcpu->arch.smbase;
7675
7676 ds.selector = 0;
7677 ds.base = 0;
7678
7679 cs.limit = ds.limit = 0xffffffff;
7680 cs.type = ds.type = 0x3;
7681 cs.dpl = ds.dpl = 0;
7682 cs.db = ds.db = 0;
7683 cs.s = ds.s = 1;
7684 cs.l = ds.l = 0;
7685 cs.g = ds.g = 1;
7686 cs.avl = ds.avl = 0;
7687 cs.present = ds.present = 1;
7688 cs.unusable = ds.unusable = 0;
7689 cs.padding = ds.padding = 0;
7690
7691 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
7692 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
7693 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
7694 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
7695 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
7696 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
7697
7698#ifdef CONFIG_X86_64
7699 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
7700 kvm_x86_ops->set_efer(vcpu, 0);
7701#endif
7702
7703 kvm_update_cpuid(vcpu);
7704 kvm_mmu_reset_context(vcpu);
7705}
7706
7707static void process_smi(struct kvm_vcpu *vcpu)
7708{
7709 vcpu->arch.smi_pending = true;
7710 kvm_make_request(KVM_REQ_EVENT, vcpu);
7711}
7712
7713void kvm_make_scan_ioapic_request(struct kvm *kvm)
7714{
7715 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
7716}
7717
7718static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
7719{
7720 if (!kvm_apic_present(vcpu))
7721 return;
7722
7723 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
7724
7725 if (irqchip_split(vcpu->kvm))
7726 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
7727 else {
7728 if (vcpu->arch.apicv_active)
7729 kvm_x86_ops->sync_pir_to_irr(vcpu);
7730 if (ioapic_in_kernel(vcpu->kvm))
7731 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
7732 }
7733
7734 if (is_guest_mode(vcpu))
7735 vcpu->arch.load_eoi_exitmap_pending = true;
7736 else
7737 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
7738}
7739
7740static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
7741{
7742 u64 eoi_exit_bitmap[4];
7743
7744 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
7745 return;
7746
7747 bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
7748 vcpu_to_synic(vcpu)->vec_bitmap, 256);
7749 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
7750}
7751
7752int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
7753 unsigned long start, unsigned long end,
7754 bool blockable)
7755{
7756 unsigned long apic_address;
7757
7758
7759
7760
7761
7762 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
7763 if (start <= apic_address && apic_address < end)
7764 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
7765
7766 return 0;
7767}
7768
7769void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
7770{
7771 struct page *page = NULL;
7772
7773 if (!lapic_in_kernel(vcpu))
7774 return;
7775
7776 if (!kvm_x86_ops->set_apic_access_page_addr)
7777 return;
7778
7779 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
7780 if (is_error_page(page))
7781 return;
7782 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
7783
7784
7785
7786
7787
7788 put_page(page);
7789}
7790EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
7791
7792void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
7793{
7794 smp_send_reschedule(vcpu->cpu);
7795}
7796EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
7797
7798
7799
7800
7801
7802
7803static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
7804{
7805 int r;
7806 bool req_int_win =
7807 dm_request_for_irq_injection(vcpu) &&
7808 kvm_cpu_accept_dm_intr(vcpu);
7809
7810 bool req_immediate_exit = false;
7811
7812 if (kvm_request_pending(vcpu)) {
7813 if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
7814 kvm_x86_ops->get_vmcs12_pages(vcpu);
7815 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
7816 kvm_mmu_unload(vcpu);
7817 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
7818 __kvm_migrate_timers(vcpu);
7819 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
7820 kvm_gen_update_masterclock(vcpu->kvm);
7821 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
7822 kvm_gen_kvmclock_update(vcpu);
7823 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
7824 r = kvm_guest_time_update(vcpu);
7825 if (unlikely(r))
7826 goto out;
7827 }
7828 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
7829 kvm_mmu_sync_roots(vcpu);
7830 if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
7831 kvm_mmu_load_cr3(vcpu);
7832 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
7833 kvm_vcpu_flush_tlb(vcpu, true);
7834 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
7835 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
7836 r = 0;
7837 goto out;
7838 }
7839 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
7840 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
7841 vcpu->mmio_needed = 0;
7842 r = 0;
7843 goto out;
7844 }
7845 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
7846
7847 vcpu->arch.apf.halted = true;
7848 r = 1;
7849 goto out;
7850 }
7851 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
7852 record_steal_time(vcpu);
7853 if (kvm_check_request(KVM_REQ_SMI, vcpu))
7854 process_smi(vcpu);
7855 if (kvm_check_request(KVM_REQ_NMI, vcpu))
7856 process_nmi(vcpu);
7857 if (kvm_check_request(KVM_REQ_PMU, vcpu))
7858 kvm_pmu_handle_event(vcpu);
7859 if (kvm_check_request(KVM_REQ_PMI, vcpu))
7860 kvm_pmu_deliver_pmi(vcpu);
7861 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
7862 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
7863 if (test_bit(vcpu->arch.pending_ioapic_eoi,
7864 vcpu->arch.ioapic_handled_vectors)) {
7865 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
7866 vcpu->run->eoi.vector =
7867 vcpu->arch.pending_ioapic_eoi;
7868 r = 0;
7869 goto out;
7870 }
7871 }
7872 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
7873 vcpu_scan_ioapic(vcpu);
7874 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
7875 vcpu_load_eoi_exitmap(vcpu);
7876 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
7877 kvm_vcpu_reload_apic_access_page(vcpu);
7878 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
7879 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
7880 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
7881 r = 0;
7882 goto out;
7883 }
7884 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
7885 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
7886 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
7887 r = 0;
7888 goto out;
7889 }
7890 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
7891 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
7892 vcpu->run->hyperv = vcpu->arch.hyperv.exit;
7893 r = 0;
7894 goto out;
7895 }
7896
7897
7898
7899
7900
7901
7902 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
7903 kvm_hv_process_stimers(vcpu);
7904 }
7905
7906 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
7907 ++vcpu->stat.req_event;
7908 kvm_apic_accept_events(vcpu);
7909 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
7910 r = 1;
7911 goto out;
7912 }
7913
7914 if (inject_pending_event(vcpu, req_int_win) != 0)
7915 req_immediate_exit = true;
7916 else {
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931 if (vcpu->arch.smi_pending && !is_smm(vcpu))
7932 if (!kvm_x86_ops->enable_smi_window(vcpu))
7933 req_immediate_exit = true;
7934 if (vcpu->arch.nmi_pending)
7935 kvm_x86_ops->enable_nmi_window(vcpu);
7936 if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
7937 kvm_x86_ops->enable_irq_window(vcpu);
7938 WARN_ON(vcpu->arch.exception.pending);
7939 }
7940
7941 if (kvm_lapic_enabled(vcpu)) {
7942 update_cr8_intercept(vcpu);
7943 kvm_lapic_sync_to_vapic(vcpu);
7944 }
7945 }
7946
7947 r = kvm_mmu_reload(vcpu);
7948 if (unlikely(r)) {
7949 goto cancel_injection;
7950 }
7951
7952 preempt_disable();
7953
7954 kvm_x86_ops->prepare_guest_switch(vcpu);
7955
7956
7957
7958
7959
7960
7961 local_irq_disable();
7962 vcpu->mode = IN_GUEST_MODE;
7963
7964 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978 smp_mb__after_srcu_read_unlock();
7979
7980
7981
7982
7983
7984 if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
7985 kvm_x86_ops->sync_pir_to_irr(vcpu);
7986
7987 if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
7988 || need_resched() || signal_pending(current)) {
7989 vcpu->mode = OUTSIDE_GUEST_MODE;
7990 smp_wmb();
7991 local_irq_enable();
7992 preempt_enable();
7993 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
7994 r = 1;
7995 goto cancel_injection;
7996 }
7997
7998 if (req_immediate_exit) {
7999 kvm_make_request(KVM_REQ_EVENT, vcpu);
8000 kvm_x86_ops->request_immediate_exit(vcpu);
8001 }
8002
8003 trace_kvm_entry(vcpu->vcpu_id);
8004 guest_enter_irqoff();
8005
8006
8007 WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));
8008
8009 if (unlikely(vcpu->arch.switch_db_regs)) {
8010 set_debugreg(0, 7);
8011 set_debugreg(vcpu->arch.eff_db[0], 0);
8012 set_debugreg(vcpu->arch.eff_db[1], 1);
8013 set_debugreg(vcpu->arch.eff_db[2], 2);
8014 set_debugreg(vcpu->arch.eff_db[3], 3);
8015 set_debugreg(vcpu->arch.dr6, 6);
8016 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8017 }
8018
8019 kvm_x86_ops->run(vcpu);
8020
8021
8022
8023
8024
8025
8026
8027 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
8028 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
8029 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
8030 kvm_update_dr0123(vcpu);
8031 kvm_update_dr6(vcpu);
8032 kvm_update_dr7(vcpu);
8033 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8034 }
8035
8036
8037
8038
8039
8040
8041
8042
8043 if (hw_breakpoint_active())
8044 hw_breakpoint_restore();
8045
8046 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
8047
8048 vcpu->mode = OUTSIDE_GUEST_MODE;
8049 smp_wmb();
8050
8051 kvm_x86_ops->handle_exit_irqoff(vcpu);
8052
8053
8054
8055
8056
8057
8058
8059
8060 kvm_before_interrupt(vcpu);
8061 local_irq_enable();
8062 ++vcpu->stat.exits;
8063 local_irq_disable();
8064 kvm_after_interrupt(vcpu);
8065
8066 guest_exit_irqoff();
8067 if (lapic_in_kernel(vcpu)) {
8068 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
8069 if (delta != S64_MIN) {
8070 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
8071 vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
8072 }
8073 }
8074
8075 local_irq_enable();
8076 preempt_enable();
8077
8078 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8079
8080
8081
8082
8083 if (unlikely(prof_on == KVM_PROFILING)) {
8084 unsigned long rip = kvm_rip_read(vcpu);
8085 profile_hit(KVM_PROFILING, (void *)rip);
8086 }
8087
8088 if (unlikely(vcpu->arch.tsc_always_catchup))
8089 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8090
8091 if (vcpu->arch.apic_attention)
8092 kvm_lapic_sync_from_vapic(vcpu);
8093
8094 vcpu->arch.gpa_available = false;
8095 r = kvm_x86_ops->handle_exit(vcpu);
8096 return r;
8097
8098cancel_injection:
8099 kvm_x86_ops->cancel_injection(vcpu);
8100 if (unlikely(vcpu->arch.apic_attention))
8101 kvm_lapic_sync_from_vapic(vcpu);
8102out:
8103 return r;
8104}
8105
8106static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
8107{
8108 if (!kvm_arch_vcpu_runnable(vcpu) &&
8109 (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
8110 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8111 kvm_vcpu_block(vcpu);
8112 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8113
8114 if (kvm_x86_ops->post_block)
8115 kvm_x86_ops->post_block(vcpu);
8116
8117 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
8118 return 1;
8119 }
8120
8121 kvm_apic_accept_events(vcpu);
8122 switch(vcpu->arch.mp_state) {
8123 case KVM_MP_STATE_HALTED:
8124 vcpu->arch.pv.pv_unhalted = false;
8125 vcpu->arch.mp_state =
8126 KVM_MP_STATE_RUNNABLE;
8127
8128 case KVM_MP_STATE_RUNNABLE:
8129 vcpu->arch.apf.halted = false;
8130 break;
8131 case KVM_MP_STATE_INIT_RECEIVED:
8132 break;
8133 default:
8134 return -EINTR;
8135 break;
8136 }
8137 return 1;
8138}
8139
8140static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
8141{
8142 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
8143 kvm_x86_ops->check_nested_events(vcpu, false);
8144
8145 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
8146 !vcpu->arch.apf.halted);
8147}
8148
8149static int vcpu_run(struct kvm_vcpu *vcpu)
8150{
8151 int r;
8152 struct kvm *kvm = vcpu->kvm;
8153
8154 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8155 vcpu->arch.l1tf_flush_l1d = true;
8156
8157 for (;;) {
8158 if (kvm_vcpu_running(vcpu)) {
8159 r = vcpu_enter_guest(vcpu);
8160 } else {
8161 r = vcpu_block(kvm, vcpu);
8162 }
8163
8164 if (r <= 0)
8165 break;
8166
8167 kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
8168 if (kvm_cpu_has_pending_timer(vcpu))
8169 kvm_inject_pending_timer_irqs(vcpu);
8170
8171 if (dm_request_for_irq_injection(vcpu) &&
8172 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
8173 r = 0;
8174 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
8175 ++vcpu->stat.request_irq_exits;
8176 break;
8177 }
8178
8179 kvm_check_async_pf_completion(vcpu);
8180
8181 if (signal_pending(current)) {
8182 r = -EINTR;
8183 vcpu->run->exit_reason = KVM_EXIT_INTR;
8184 ++vcpu->stat.signal_exits;
8185 break;
8186 }
8187 if (need_resched()) {
8188 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8189 cond_resched();
8190 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8191 }
8192 }
8193
8194 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8195
8196 return r;
8197}
8198
8199static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
8200{
8201 int r;
8202 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8203 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
8204 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8205 if (r != EMULATE_DONE)
8206 return 0;
8207 return 1;
8208}
8209
8210static int complete_emulated_pio(struct kvm_vcpu *vcpu)
8211{
8212 BUG_ON(!vcpu->arch.pio.count);
8213
8214 return complete_emulated_io(vcpu);
8215}
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
8236{
8237 struct kvm_run *run = vcpu->run;
8238 struct kvm_mmio_fragment *frag;
8239 unsigned len;
8240
8241 BUG_ON(!vcpu->mmio_needed);
8242
8243
8244 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
8245 len = min(8u, frag->len);
8246 if (!vcpu->mmio_is_write)
8247 memcpy(frag->data, run->mmio.data, len);
8248
8249 if (frag->len <= 8) {
8250
8251 frag++;
8252 vcpu->mmio_cur_fragment++;
8253 } else {
8254
8255 frag->data += len;
8256 frag->gpa += len;
8257 frag->len -= len;
8258 }
8259
8260 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
8261 vcpu->mmio_needed = 0;
8262
8263
8264 if (vcpu->mmio_is_write)
8265 return 1;
8266 vcpu->mmio_read_completed = 1;
8267 return complete_emulated_io(vcpu);
8268 }
8269
8270 run->exit_reason = KVM_EXIT_MMIO;
8271 run->mmio.phys_addr = frag->gpa;
8272 if (vcpu->mmio_is_write)
8273 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
8274 run->mmio.len = min(8u, frag->len);
8275 run->mmio.is_write = vcpu->mmio_is_write;
8276 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
8277 return 0;
8278}
8279
8280
8281static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
8282{
8283 fpregs_lock();
8284
8285 copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
8286
8287 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
8288 ~XFEATURE_MASK_PKRU);
8289
8290 fpregs_mark_activate();
8291 fpregs_unlock();
8292
8293 trace_kvm_fpu(1);
8294}
8295
8296
8297static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
8298{
8299 fpregs_lock();
8300
8301 copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
8302 copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
8303
8304 fpregs_mark_activate();
8305 fpregs_unlock();
8306
8307 ++vcpu->stat.fpu_reload;
8308 trace_kvm_fpu(0);
8309}
8310
8311int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
8312{
8313 int r;
8314
8315 vcpu_load(vcpu);
8316 kvm_sigset_activate(vcpu);
8317 kvm_load_guest_fpu(vcpu);
8318
8319 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
8320 if (kvm_run->immediate_exit) {
8321 r = -EINTR;
8322 goto out;
8323 }
8324 kvm_vcpu_block(vcpu);
8325 kvm_apic_accept_events(vcpu);
8326 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
8327 r = -EAGAIN;
8328 if (signal_pending(current)) {
8329 r = -EINTR;
8330 vcpu->run->exit_reason = KVM_EXIT_INTR;
8331 ++vcpu->stat.signal_exits;
8332 }
8333 goto out;
8334 }
8335
8336 if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
8337 r = -EINVAL;
8338 goto out;
8339 }
8340
8341 if (vcpu->run->kvm_dirty_regs) {
8342 r = sync_regs(vcpu);
8343 if (r != 0)
8344 goto out;
8345 }
8346
8347
8348 if (!lapic_in_kernel(vcpu)) {
8349 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
8350 r = -EINVAL;
8351 goto out;
8352 }
8353 }
8354
8355 if (unlikely(vcpu->arch.complete_userspace_io)) {
8356 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
8357 vcpu->arch.complete_userspace_io = NULL;
8358 r = cui(vcpu);
8359 if (r <= 0)
8360 goto out;
8361 } else
8362 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
8363
8364 if (kvm_run->immediate_exit)
8365 r = -EINTR;
8366 else
8367 r = vcpu_run(vcpu);
8368
8369out:
8370 kvm_put_guest_fpu(vcpu);
8371 if (vcpu->run->kvm_valid_regs)
8372 store_regs(vcpu);
8373 post_kvm_run_save(vcpu);
8374 kvm_sigset_deactivate(vcpu);
8375
8376 vcpu_put(vcpu);
8377 return r;
8378}
8379
8380static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8381{
8382 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
8383
8384
8385
8386
8387
8388
8389
8390 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
8391 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8392 }
8393 regs->rax = kvm_rax_read(vcpu);
8394 regs->rbx = kvm_rbx_read(vcpu);
8395 regs->rcx = kvm_rcx_read(vcpu);
8396 regs->rdx = kvm_rdx_read(vcpu);
8397 regs->rsi = kvm_rsi_read(vcpu);
8398 regs->rdi = kvm_rdi_read(vcpu);
8399 regs->rsp = kvm_rsp_read(vcpu);
8400 regs->rbp = kvm_rbp_read(vcpu);
8401#ifdef CONFIG_X86_64
8402 regs->r8 = kvm_r8_read(vcpu);
8403 regs->r9 = kvm_r9_read(vcpu);
8404 regs->r10 = kvm_r10_read(vcpu);
8405 regs->r11 = kvm_r11_read(vcpu);
8406 regs->r12 = kvm_r12_read(vcpu);
8407 regs->r13 = kvm_r13_read(vcpu);
8408 regs->r14 = kvm_r14_read(vcpu);
8409 regs->r15 = kvm_r15_read(vcpu);
8410#endif
8411
8412 regs->rip = kvm_rip_read(vcpu);
8413 regs->rflags = kvm_get_rflags(vcpu);
8414}
8415
8416int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8417{
8418 vcpu_load(vcpu);
8419 __get_regs(vcpu, regs);
8420 vcpu_put(vcpu);
8421 return 0;
8422}
8423
8424static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8425{
8426 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
8427 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8428
8429 kvm_rax_write(vcpu, regs->rax);
8430 kvm_rbx_write(vcpu, regs->rbx);
8431 kvm_rcx_write(vcpu, regs->rcx);
8432 kvm_rdx_write(vcpu, regs->rdx);
8433 kvm_rsi_write(vcpu, regs->rsi);
8434 kvm_rdi_write(vcpu, regs->rdi);
8435 kvm_rsp_write(vcpu, regs->rsp);
8436 kvm_rbp_write(vcpu, regs->rbp);
8437#ifdef CONFIG_X86_64
8438 kvm_r8_write(vcpu, regs->r8);
8439 kvm_r9_write(vcpu, regs->r9);
8440 kvm_r10_write(vcpu, regs->r10);
8441 kvm_r11_write(vcpu, regs->r11);
8442 kvm_r12_write(vcpu, regs->r12);
8443 kvm_r13_write(vcpu, regs->r13);
8444 kvm_r14_write(vcpu, regs->r14);
8445 kvm_r15_write(vcpu, regs->r15);
8446#endif
8447
8448 kvm_rip_write(vcpu, regs->rip);
8449 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
8450
8451 vcpu->arch.exception.pending = false;
8452
8453 kvm_make_request(KVM_REQ_EVENT, vcpu);
8454}
8455
8456int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8457{
8458 vcpu_load(vcpu);
8459 __set_regs(vcpu, regs);
8460 vcpu_put(vcpu);
8461 return 0;
8462}
8463
8464void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
8465{
8466 struct kvm_segment cs;
8467
8468 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
8469 *db = cs.db;
8470 *l = cs.l;
8471}
8472EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
8473
8474static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8475{
8476 struct desc_ptr dt;
8477
8478 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
8479 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
8480 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
8481 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
8482 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
8483 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
8484
8485 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
8486 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
8487
8488 kvm_x86_ops->get_idt(vcpu, &dt);
8489 sregs->idt.limit = dt.size;
8490 sregs->idt.base = dt.address;
8491 kvm_x86_ops->get_gdt(vcpu, &dt);
8492 sregs->gdt.limit = dt.size;
8493 sregs->gdt.base = dt.address;
8494
8495 sregs->cr0 = kvm_read_cr0(vcpu);
8496 sregs->cr2 = vcpu->arch.cr2;
8497 sregs->cr3 = kvm_read_cr3(vcpu);
8498 sregs->cr4 = kvm_read_cr4(vcpu);
8499 sregs->cr8 = kvm_get_cr8(vcpu);
8500 sregs->efer = vcpu->arch.efer;
8501 sregs->apic_base = kvm_get_apic_base(vcpu);
8502
8503 memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap));
8504
8505 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
8506 set_bit(vcpu->arch.interrupt.nr,
8507 (unsigned long *)sregs->interrupt_bitmap);
8508}
8509
8510int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
8511 struct kvm_sregs *sregs)
8512{
8513 vcpu_load(vcpu);
8514 __get_sregs(vcpu, sregs);
8515 vcpu_put(vcpu);
8516 return 0;
8517}
8518
8519int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
8520 struct kvm_mp_state *mp_state)
8521{
8522 vcpu_load(vcpu);
8523
8524 kvm_apic_accept_events(vcpu);
8525 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
8526 vcpu->arch.pv.pv_unhalted)
8527 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
8528 else
8529 mp_state->mp_state = vcpu->arch.mp_state;
8530
8531 vcpu_put(vcpu);
8532 return 0;
8533}
8534
8535int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
8536 struct kvm_mp_state *mp_state)
8537{
8538 int ret = -EINVAL;
8539
8540 vcpu_load(vcpu);
8541
8542 if (!lapic_in_kernel(vcpu) &&
8543 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
8544 goto out;
8545
8546
8547 if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
8548 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
8549 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
8550 goto out;
8551
8552 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
8553 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
8554 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
8555 } else
8556 vcpu->arch.mp_state = mp_state->mp_state;
8557 kvm_make_request(KVM_REQ_EVENT, vcpu);
8558
8559 ret = 0;
8560out:
8561 vcpu_put(vcpu);
8562 return ret;
8563}
8564
8565int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
8566 int reason, bool has_error_code, u32 error_code)
8567{
8568 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
8569 int ret;
8570
8571 init_emulate_ctxt(vcpu);
8572
8573 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
8574 has_error_code, error_code);
8575
8576 if (ret)
8577 return EMULATE_FAIL;
8578
8579 kvm_rip_write(vcpu, ctxt->eip);
8580 kvm_set_rflags(vcpu, ctxt->eflags);
8581 kvm_make_request(KVM_REQ_EVENT, vcpu);
8582 return EMULATE_DONE;
8583}
8584EXPORT_SYMBOL_GPL(kvm_task_switch);
8585
8586static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8587{
8588 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
8589 (sregs->cr4 & X86_CR4_OSXSAVE))
8590 return -EINVAL;
8591
8592 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
8593
8594
8595
8596
8597
8598 if (!(sregs->cr4 & X86_CR4_PAE)
8599 || !(sregs->efer & EFER_LMA))
8600 return -EINVAL;
8601 } else {
8602
8603
8604
8605
8606 if (sregs->efer & EFER_LMA || sregs->cs.l)
8607 return -EINVAL;
8608 }
8609
8610 return 0;
8611}
8612
8613static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8614{
8615 struct msr_data apic_base_msr;
8616 int mmu_reset_needed = 0;
8617 int cpuid_update_needed = 0;
8618 int pending_vec, max_bits, idx;
8619 struct desc_ptr dt;
8620 int ret = -EINVAL;
8621
8622 if (kvm_valid_sregs(vcpu, sregs))
8623 goto out;
8624
8625 apic_base_msr.data = sregs->apic_base;
8626 apic_base_msr.host_initiated = true;
8627 if (kvm_set_apic_base(vcpu, &apic_base_msr))
8628 goto out;
8629
8630 dt.size = sregs->idt.limit;
8631 dt.address = sregs->idt.base;
8632 kvm_x86_ops->set_idt(vcpu, &dt);
8633 dt.size = sregs->gdt.limit;
8634 dt.address = sregs->gdt.base;
8635 kvm_x86_ops->set_gdt(vcpu, &dt);
8636
8637 vcpu->arch.cr2 = sregs->cr2;
8638 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
8639 vcpu->arch.cr3 = sregs->cr3;
8640 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
8641
8642 kvm_set_cr8(vcpu, sregs->cr8);
8643
8644 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
8645 kvm_x86_ops->set_efer(vcpu, sregs->efer);
8646
8647 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
8648 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
8649 vcpu->arch.cr0 = sregs->cr0;
8650
8651 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
8652 cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
8653 (X86_CR4_OSXSAVE | X86_CR4_PKE));
8654 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
8655 if (cpuid_update_needed)
8656 kvm_update_cpuid(vcpu);
8657
8658 idx = srcu_read_lock(&vcpu->kvm->srcu);
8659 if (is_pae_paging(vcpu)) {
8660 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
8661 mmu_reset_needed = 1;
8662 }
8663 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8664
8665 if (mmu_reset_needed)
8666 kvm_mmu_reset_context(vcpu);
8667
8668 max_bits = KVM_NR_INTERRUPTS;
8669 pending_vec = find_first_bit(
8670 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
8671 if (pending_vec < max_bits) {
8672 kvm_queue_interrupt(vcpu, pending_vec, false);
8673 pr_debug("Set back pending irq %d\n", pending_vec);
8674 }
8675
8676 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
8677 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
8678 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
8679 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
8680 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
8681 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
8682
8683 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
8684 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
8685
8686 update_cr8_intercept(vcpu);
8687
8688
8689 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
8690 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
8691 !is_protmode(vcpu))
8692 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8693
8694 kvm_make_request(KVM_REQ_EVENT, vcpu);
8695
8696 ret = 0;
8697out:
8698 return ret;
8699}
8700
8701int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
8702 struct kvm_sregs *sregs)
8703{
8704 int ret;
8705
8706 vcpu_load(vcpu);
8707 ret = __set_sregs(vcpu, sregs);
8708 vcpu_put(vcpu);
8709 return ret;
8710}
8711
8712int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
8713 struct kvm_guest_debug *dbg)
8714{
8715 unsigned long rflags;
8716 int i, r;
8717
8718 vcpu_load(vcpu);
8719
8720 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
8721 r = -EBUSY;
8722 if (vcpu->arch.exception.pending)
8723 goto out;
8724 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
8725 kvm_queue_exception(vcpu, DB_VECTOR);
8726 else
8727 kvm_queue_exception(vcpu, BP_VECTOR);
8728 }
8729
8730
8731
8732
8733
8734 rflags = kvm_get_rflags(vcpu);
8735
8736 vcpu->guest_debug = dbg->control;
8737 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
8738 vcpu->guest_debug = 0;
8739
8740 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
8741 for (i = 0; i < KVM_NR_DB_REGS; ++i)
8742 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
8743 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
8744 } else {
8745 for (i = 0; i < KVM_NR_DB_REGS; i++)
8746 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
8747 }
8748 kvm_update_dr7(vcpu);
8749
8750 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
8751 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
8752 get_segment_base(vcpu, VCPU_SREG_CS);
8753
8754
8755
8756
8757
8758 kvm_set_rflags(vcpu, rflags);
8759
8760 kvm_x86_ops->update_bp_intercept(vcpu);
8761
8762 r = 0;
8763
8764out:
8765 vcpu_put(vcpu);
8766 return r;
8767}
8768
8769
8770
8771
8772int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
8773 struct kvm_translation *tr)
8774{
8775 unsigned long vaddr = tr->linear_address;
8776 gpa_t gpa;
8777 int idx;
8778
8779 vcpu_load(vcpu);
8780
8781 idx = srcu_read_lock(&vcpu->kvm->srcu);
8782 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
8783 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8784 tr->physical_address = gpa;
8785 tr->valid = gpa != UNMAPPED_GVA;
8786 tr->writeable = 1;
8787 tr->usermode = 0;
8788
8789 vcpu_put(vcpu);
8790 return 0;
8791}
8792
8793int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
8794{
8795 struct fxregs_state *fxsave;
8796
8797 vcpu_load(vcpu);
8798
8799 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
8800 memcpy(fpu->fpr, fxsave->st_space, 128);
8801 fpu->fcw = fxsave->cwd;
8802 fpu->fsw = fxsave->swd;
8803 fpu->ftwx = fxsave->twd;
8804 fpu->last_opcode = fxsave->fop;
8805 fpu->last_ip = fxsave->rip;
8806 fpu->last_dp = fxsave->rdp;
8807 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
8808
8809 vcpu_put(vcpu);
8810 return 0;
8811}
8812
8813int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
8814{
8815 struct fxregs_state *fxsave;
8816
8817 vcpu_load(vcpu);
8818
8819 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
8820
8821 memcpy(fxsave->st_space, fpu->fpr, 128);
8822 fxsave->cwd = fpu->fcw;
8823 fxsave->swd = fpu->fsw;
8824 fxsave->twd = fpu->ftwx;
8825 fxsave->fop = fpu->last_opcode;
8826 fxsave->rip = fpu->last_ip;
8827 fxsave->rdp = fpu->last_dp;
8828 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
8829
8830 vcpu_put(vcpu);
8831 return 0;
8832}
8833
8834static void store_regs(struct kvm_vcpu *vcpu)
8835{
8836 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
8837
8838 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
8839 __get_regs(vcpu, &vcpu->run->s.regs.regs);
8840
8841 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
8842 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
8843
8844 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
8845 kvm_vcpu_ioctl_x86_get_vcpu_events(
8846 vcpu, &vcpu->run->s.regs.events);
8847}
8848
8849static int sync_regs(struct kvm_vcpu *vcpu)
8850{
8851 if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
8852 return -EINVAL;
8853
8854 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
8855 __set_regs(vcpu, &vcpu->run->s.regs.regs);
8856 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
8857 }
8858 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
8859 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
8860 return -EINVAL;
8861 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
8862 }
8863 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
8864 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
8865 vcpu, &vcpu->run->s.regs.events))
8866 return -EINVAL;
8867 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
8868 }
8869
8870 return 0;
8871}
8872
8873static void fx_init(struct kvm_vcpu *vcpu)
8874{
8875 fpstate_init(&vcpu->arch.guest_fpu->state);
8876 if (boot_cpu_has(X86_FEATURE_XSAVES))
8877 vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
8878 host_xcr0 | XSTATE_COMPACTION_ENABLED;
8879
8880
8881
8882
8883 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
8884
8885 vcpu->arch.cr0 |= X86_CR0_ET;
8886}
8887
8888void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
8889{
8890 void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
8891
8892 kvmclock_reset(vcpu);
8893
8894 kvm_x86_ops->vcpu_free(vcpu);
8895 free_cpumask_var(wbinvd_dirty_mask);
8896}
8897
8898struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
8899 unsigned int id)
8900{
8901 struct kvm_vcpu *vcpu;
8902
8903 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
8904 printk_once(KERN_WARNING
8905 "kvm: SMP vm created on host with unstable TSC; "
8906 "guest TSC will not be reliable\n");
8907
8908 vcpu = kvm_x86_ops->vcpu_create(kvm, id);
8909
8910 return vcpu;
8911}
8912
8913int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
8914{
8915 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
8916 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
8917 kvm_vcpu_mtrr_init(vcpu);
8918 vcpu_load(vcpu);
8919 kvm_vcpu_reset(vcpu, false);
8920 kvm_init_mmu(vcpu, false);
8921 vcpu_put(vcpu);
8922 return 0;
8923}
8924
8925void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
8926{
8927 struct msr_data msr;
8928 struct kvm *kvm = vcpu->kvm;
8929
8930 kvm_hv_vcpu_postcreate(vcpu);
8931
8932 if (mutex_lock_killable(&vcpu->mutex))
8933 return;
8934 vcpu_load(vcpu);
8935 msr.data = 0x0;
8936 msr.index = MSR_IA32_TSC;
8937 msr.host_initiated = true;
8938 kvm_write_tsc(vcpu, &msr);
8939 vcpu_put(vcpu);
8940
8941
8942 vcpu->arch.msr_kvm_poll_control = 1;
8943
8944 mutex_unlock(&vcpu->mutex);
8945
8946 if (!kvmclock_periodic_sync)
8947 return;
8948
8949 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
8950 KVMCLOCK_SYNC_PERIOD);
8951}
8952
8953void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
8954{
8955 vcpu->arch.apf.msr_val = 0;
8956
8957 vcpu_load(vcpu);
8958 kvm_mmu_unload(vcpu);
8959 vcpu_put(vcpu);
8960
8961 kvm_x86_ops->vcpu_free(vcpu);
8962}
8963
8964void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
8965{
8966 kvm_lapic_reset(vcpu, init_event);
8967
8968 vcpu->arch.hflags = 0;
8969
8970 vcpu->arch.smi_pending = 0;
8971 vcpu->arch.smi_count = 0;
8972 atomic_set(&vcpu->arch.nmi_queued, 0);
8973 vcpu->arch.nmi_pending = 0;
8974 vcpu->arch.nmi_injected = false;
8975 kvm_clear_interrupt_queue(vcpu);
8976 kvm_clear_exception_queue(vcpu);
8977 vcpu->arch.exception.pending = false;
8978
8979 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
8980 kvm_update_dr0123(vcpu);
8981 vcpu->arch.dr6 = DR6_INIT;
8982 kvm_update_dr6(vcpu);
8983 vcpu->arch.dr7 = DR7_FIXED_1;
8984 kvm_update_dr7(vcpu);
8985
8986 vcpu->arch.cr2 = 0;
8987
8988 kvm_make_request(KVM_REQ_EVENT, vcpu);
8989 vcpu->arch.apf.msr_val = 0;
8990 vcpu->arch.st.msr_val = 0;
8991
8992 kvmclock_reset(vcpu);
8993
8994 kvm_clear_async_pf_completion_queue(vcpu);
8995 kvm_async_pf_hash_reset(vcpu);
8996 vcpu->arch.apf.halted = false;
8997
8998 if (kvm_mpx_supported()) {
8999 void *mpx_state_buffer;
9000
9001
9002
9003
9004
9005 if (init_event)
9006 kvm_put_guest_fpu(vcpu);
9007 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
9008 XFEATURE_BNDREGS);
9009 if (mpx_state_buffer)
9010 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
9011 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
9012 XFEATURE_BNDCSR);
9013 if (mpx_state_buffer)
9014 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
9015 if (init_event)
9016 kvm_load_guest_fpu(vcpu);
9017 }
9018
9019 if (!init_event) {
9020 kvm_pmu_reset(vcpu);
9021 vcpu->arch.smbase = 0x30000;
9022
9023 vcpu->arch.msr_misc_features_enables = 0;
9024
9025 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
9026 }
9027
9028 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
9029 vcpu->arch.regs_avail = ~0;
9030 vcpu->arch.regs_dirty = ~0;
9031
9032 vcpu->arch.ia32_xss = 0;
9033
9034 kvm_x86_ops->vcpu_reset(vcpu, init_event);
9035}
9036
9037void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
9038{
9039 struct kvm_segment cs;
9040
9041 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
9042 cs.selector = vector << 8;
9043 cs.base = vector << 12;
9044 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
9045 kvm_rip_write(vcpu, 0);
9046}
9047
9048int kvm_arch_hardware_enable(void)
9049{
9050 struct kvm *kvm;
9051 struct kvm_vcpu *vcpu;
9052 int i;
9053 int ret;
9054 u64 local_tsc;
9055 u64 max_tsc = 0;
9056 bool stable, backwards_tsc = false;
9057
9058 kvm_shared_msr_cpu_online();
9059 ret = kvm_x86_ops->hardware_enable();
9060 if (ret != 0)
9061 return ret;
9062
9063 local_tsc = rdtsc();
9064 stable = !kvm_check_tsc_unstable();
9065 list_for_each_entry(kvm, &vm_list, vm_list) {
9066 kvm_for_each_vcpu(i, vcpu, kvm) {
9067 if (!stable && vcpu->cpu == smp_processor_id())
9068 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
9069 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
9070 backwards_tsc = true;
9071 if (vcpu->arch.last_host_tsc > max_tsc)
9072 max_tsc = vcpu->arch.last_host_tsc;
9073 }
9074 }
9075 }
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115 if (backwards_tsc) {
9116 u64 delta_cyc = max_tsc - local_tsc;
9117 list_for_each_entry(kvm, &vm_list, vm_list) {
9118 kvm->arch.backwards_tsc_observed = true;
9119 kvm_for_each_vcpu(i, vcpu, kvm) {
9120 vcpu->arch.tsc_offset_adjustment += delta_cyc;
9121 vcpu->arch.last_host_tsc = local_tsc;
9122 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
9123 }
9124
9125
9126
9127
9128
9129
9130
9131 kvm->arch.last_tsc_nsec = 0;
9132 kvm->arch.last_tsc_write = 0;
9133 }
9134
9135 }
9136 return 0;
9137}
9138
9139void kvm_arch_hardware_disable(void)
9140{
9141 kvm_x86_ops->hardware_disable();
9142 drop_user_return_notifiers();
9143}
9144
9145int kvm_arch_hardware_setup(void)
9146{
9147 int r;
9148
9149 r = kvm_x86_ops->hardware_setup();
9150 if (r != 0)
9151 return r;
9152
9153 if (kvm_has_tsc_control) {
9154
9155
9156
9157
9158
9159
9160 u64 max = min(0x7fffffffULL,
9161 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
9162 kvm_max_guest_tsc_khz = max;
9163
9164 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
9165 }
9166
9167 kvm_init_msr_list();
9168 return 0;
9169}
9170
9171void kvm_arch_hardware_unsetup(void)
9172{
9173 kvm_x86_ops->hardware_unsetup();
9174}
9175
9176int kvm_arch_check_processor_compat(void)
9177{
9178 return kvm_x86_ops->check_processor_compatibility();
9179}
9180
9181bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
9182{
9183 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
9184}
9185EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
9186
9187bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
9188{
9189 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
9190}
9191
9192struct static_key kvm_no_apic_vcpu __read_mostly;
9193EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
9194
9195int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
9196{
9197 struct page *page;
9198 int r;
9199
9200 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
9201 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
9202 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9203 else
9204 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
9205
9206 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
9207 if (!page) {
9208 r = -ENOMEM;
9209 goto fail;
9210 }
9211 vcpu->arch.pio_data = page_address(page);
9212
9213 kvm_set_tsc_khz(vcpu, max_tsc_khz);
9214
9215 r = kvm_mmu_create(vcpu);
9216 if (r < 0)
9217 goto fail_free_pio_data;
9218
9219 if (irqchip_in_kernel(vcpu->kvm)) {
9220 vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
9221 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
9222 if (r < 0)
9223 goto fail_mmu_destroy;
9224 } else
9225 static_key_slow_inc(&kvm_no_apic_vcpu);
9226
9227 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
9228 GFP_KERNEL_ACCOUNT);
9229 if (!vcpu->arch.mce_banks) {
9230 r = -ENOMEM;
9231 goto fail_free_lapic;
9232 }
9233 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
9234
9235 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
9236 GFP_KERNEL_ACCOUNT)) {
9237 r = -ENOMEM;
9238 goto fail_free_mce_banks;
9239 }
9240
9241 fx_init(vcpu);
9242
9243 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
9244
9245 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
9246
9247 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
9248
9249 kvm_async_pf_hash_reset(vcpu);
9250 kvm_pmu_init(vcpu);
9251
9252 vcpu->arch.pending_external_vector = -1;
9253 vcpu->arch.preempted_in_kernel = false;
9254
9255 kvm_hv_vcpu_init(vcpu);
9256
9257 return 0;
9258
9259fail_free_mce_banks:
9260 kfree(vcpu->arch.mce_banks);
9261fail_free_lapic:
9262 kvm_free_lapic(vcpu);
9263fail_mmu_destroy:
9264 kvm_mmu_destroy(vcpu);
9265fail_free_pio_data:
9266 free_page((unsigned long)vcpu->arch.pio_data);
9267fail:
9268 return r;
9269}
9270
9271void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
9272{
9273 int idx;
9274
9275 kvm_hv_vcpu_uninit(vcpu);
9276 kvm_pmu_destroy(vcpu);
9277 kfree(vcpu->arch.mce_banks);
9278 kvm_free_lapic(vcpu);
9279 idx = srcu_read_lock(&vcpu->kvm->srcu);
9280 kvm_mmu_destroy(vcpu);
9281 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9282 free_page((unsigned long)vcpu->arch.pio_data);
9283 if (!lapic_in_kernel(vcpu))
9284 static_key_slow_dec(&kvm_no_apic_vcpu);
9285}
9286
9287void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
9288{
9289 vcpu->arch.l1tf_flush_l1d = true;
9290 kvm_x86_ops->sched_in(vcpu, cpu);
9291}
9292
9293int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
9294{
9295 if (type)
9296 return -EINVAL;
9297
9298 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
9299 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
9300 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
9301 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
9302
9303
9304 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
9305
9306 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
9307 &kvm->arch.irq_sources_bitmap);
9308
9309 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
9310 mutex_init(&kvm->arch.apic_map_lock);
9311 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
9312
9313 kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
9314 pvclock_update_vm_gtod_copy(kvm);
9315
9316 kvm->arch.guest_can_read_msr_platform_info = true;
9317
9318 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
9319 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
9320
9321 kvm_hv_init_vm(kvm);
9322 kvm_page_track_init(kvm);
9323 kvm_mmu_init_vm(kvm);
9324
9325 if (kvm_x86_ops->vm_init)
9326 return kvm_x86_ops->vm_init(kvm);
9327
9328 return 0;
9329}
9330
9331static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
9332{
9333 vcpu_load(vcpu);
9334 kvm_mmu_unload(vcpu);
9335 vcpu_put(vcpu);
9336}
9337
9338static void kvm_free_vcpus(struct kvm *kvm)
9339{
9340 unsigned int i;
9341 struct kvm_vcpu *vcpu;
9342
9343
9344
9345
9346 kvm_for_each_vcpu(i, vcpu, kvm) {
9347 kvm_clear_async_pf_completion_queue(vcpu);
9348 kvm_unload_vcpu_mmu(vcpu);
9349 }
9350 kvm_for_each_vcpu(i, vcpu, kvm)
9351 kvm_arch_vcpu_free(vcpu);
9352
9353 mutex_lock(&kvm->lock);
9354 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
9355 kvm->vcpus[i] = NULL;
9356
9357 atomic_set(&kvm->online_vcpus, 0);
9358 mutex_unlock(&kvm->lock);
9359}
9360
9361void kvm_arch_sync_events(struct kvm *kvm)
9362{
9363 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
9364 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
9365 kvm_free_pit(kvm);
9366}
9367
9368int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
9369{
9370 int i, r;
9371 unsigned long hva;
9372 struct kvm_memslots *slots = kvm_memslots(kvm);
9373 struct kvm_memory_slot *slot, old;
9374
9375
9376 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
9377 return -EINVAL;
9378
9379 slot = id_to_memslot(slots, id);
9380 if (size) {
9381 if (slot->npages)
9382 return -EEXIST;
9383
9384
9385
9386
9387
9388 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
9389 MAP_SHARED | MAP_ANONYMOUS, 0);
9390 if (IS_ERR((void *)hva))
9391 return PTR_ERR((void *)hva);
9392 } else {
9393 if (!slot->npages)
9394 return 0;
9395
9396 hva = 0;
9397 }
9398
9399 old = *slot;
9400 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
9401 struct kvm_userspace_memory_region m;
9402
9403 m.slot = id | (i << 16);
9404 m.flags = 0;
9405 m.guest_phys_addr = gpa;
9406 m.userspace_addr = hva;
9407 m.memory_size = size;
9408 r = __kvm_set_memory_region(kvm, &m);
9409 if (r < 0)
9410 return r;
9411 }
9412
9413 if (!size)
9414 vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
9415
9416 return 0;
9417}
9418EXPORT_SYMBOL_GPL(__x86_set_memory_region);
9419
9420int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
9421{
9422 int r;
9423
9424 mutex_lock(&kvm->slots_lock);
9425 r = __x86_set_memory_region(kvm, id, gpa, size);
9426 mutex_unlock(&kvm->slots_lock);
9427
9428 return r;
9429}
9430EXPORT_SYMBOL_GPL(x86_set_memory_region);
9431
9432void kvm_arch_destroy_vm(struct kvm *kvm)
9433{
9434 if (current->mm == kvm->mm) {
9435
9436
9437
9438
9439
9440 x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
9441 x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
9442 x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
9443 }
9444 if (kvm_x86_ops->vm_destroy)
9445 kvm_x86_ops->vm_destroy(kvm);
9446 kvm_pic_destroy(kvm);
9447 kvm_ioapic_destroy(kvm);
9448 kvm_free_vcpus(kvm);
9449 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
9450 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
9451 kvm_mmu_uninit_vm(kvm);
9452 kvm_page_track_cleanup(kvm);
9453 kvm_hv_destroy_vm(kvm);
9454}
9455
9456void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
9457 struct kvm_memory_slot *dont)
9458{
9459 int i;
9460
9461 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9462 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
9463 kvfree(free->arch.rmap[i]);
9464 free->arch.rmap[i] = NULL;
9465 }
9466 if (i == 0)
9467 continue;
9468
9469 if (!dont || free->arch.lpage_info[i - 1] !=
9470 dont->arch.lpage_info[i - 1]) {
9471 kvfree(free->arch.lpage_info[i - 1]);
9472 free->arch.lpage_info[i - 1] = NULL;
9473 }
9474 }
9475
9476 kvm_page_track_free_memslot(free, dont);
9477}
9478
9479int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
9480 unsigned long npages)
9481{
9482 int i;
9483
9484 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9485 struct kvm_lpage_info *linfo;
9486 unsigned long ugfn;
9487 int lpages;
9488 int level = i + 1;
9489
9490 lpages = gfn_to_index(slot->base_gfn + npages - 1,
9491 slot->base_gfn, level) + 1;
9492
9493 slot->arch.rmap[i] =
9494 kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
9495 GFP_KERNEL_ACCOUNT);
9496 if (!slot->arch.rmap[i])
9497 goto out_free;
9498 if (i == 0)
9499 continue;
9500
9501 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
9502 if (!linfo)
9503 goto out_free;
9504
9505 slot->arch.lpage_info[i - 1] = linfo;
9506
9507 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
9508 linfo[0].disallow_lpage = 1;
9509 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
9510 linfo[lpages - 1].disallow_lpage = 1;
9511 ugfn = slot->userspace_addr >> PAGE_SHIFT;
9512
9513
9514
9515
9516
9517 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
9518 !kvm_largepages_enabled()) {
9519 unsigned long j;
9520
9521 for (j = 0; j < lpages; ++j)
9522 linfo[j].disallow_lpage = 1;
9523 }
9524 }
9525
9526 if (kvm_page_track_create_memslot(slot, npages))
9527 goto out_free;
9528
9529 return 0;
9530
9531out_free:
9532 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9533 kvfree(slot->arch.rmap[i]);
9534 slot->arch.rmap[i] = NULL;
9535 if (i == 0)
9536 continue;
9537
9538 kvfree(slot->arch.lpage_info[i - 1]);
9539 slot->arch.lpage_info[i - 1] = NULL;
9540 }
9541 return -ENOMEM;
9542}
9543
9544void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
9545{
9546
9547
9548
9549
9550 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
9551}
9552
9553int kvm_arch_prepare_memory_region(struct kvm *kvm,
9554 struct kvm_memory_slot *memslot,
9555 const struct kvm_userspace_memory_region *mem,
9556 enum kvm_mr_change change)
9557{
9558 return 0;
9559}
9560
9561static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
9562 struct kvm_memory_slot *new)
9563{
9564
9565 if (new->flags & KVM_MEM_READONLY) {
9566 kvm_mmu_slot_remove_write_access(kvm, new);
9567 return;
9568 }
9569
9570
9571
9572
9573
9574
9575
9576
9577
9578
9579
9580
9581
9582
9583
9584
9585
9586
9587
9588
9589
9590
9591
9592
9593
9594
9595
9596
9597
9598
9599
9600 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
9601 if (kvm_x86_ops->slot_enable_log_dirty)
9602 kvm_x86_ops->slot_enable_log_dirty(kvm, new);
9603 else
9604 kvm_mmu_slot_remove_write_access(kvm, new);
9605 } else {
9606 if (kvm_x86_ops->slot_disable_log_dirty)
9607 kvm_x86_ops->slot_disable_log_dirty(kvm, new);
9608 }
9609}
9610
9611void kvm_arch_commit_memory_region(struct kvm *kvm,
9612 const struct kvm_userspace_memory_region *mem,
9613 const struct kvm_memory_slot *old,
9614 const struct kvm_memory_slot *new,
9615 enum kvm_mr_change change)
9616{
9617 if (!kvm->arch.n_requested_mmu_pages)
9618 kvm_mmu_change_mmu_pages(kvm,
9619 kvm_mmu_calculate_default_mmu_pages(kvm));
9620
9621
9622
9623
9624
9625
9626
9627
9628
9629
9630
9631
9632
9633 if ((change != KVM_MR_DELETE) &&
9634 (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
9635 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
9636 kvm_mmu_zap_collapsible_sptes(kvm, new);
9637
9638
9639
9640
9641
9642
9643
9644
9645
9646
9647
9648 if (change != KVM_MR_DELETE)
9649 kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
9650}
9651
9652void kvm_arch_flush_shadow_all(struct kvm *kvm)
9653{
9654 kvm_mmu_zap_all(kvm);
9655}
9656
9657void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
9658 struct kvm_memory_slot *slot)
9659{
9660 kvm_page_track_flush_slot(kvm, slot);
9661}
9662
9663static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
9664{
9665 return (is_guest_mode(vcpu) &&
9666 kvm_x86_ops->guest_apic_has_interrupt &&
9667 kvm_x86_ops->guest_apic_has_interrupt(vcpu));
9668}
9669
9670static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
9671{
9672 if (!list_empty_careful(&vcpu->async_pf.done))
9673 return true;
9674
9675 if (kvm_apic_has_events(vcpu))
9676 return true;
9677
9678 if (vcpu->arch.pv.pv_unhalted)
9679 return true;
9680
9681 if (vcpu->arch.exception.pending)
9682 return true;
9683
9684 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
9685 (vcpu->arch.nmi_pending &&
9686 kvm_x86_ops->nmi_allowed(vcpu)))
9687 return true;
9688
9689 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
9690 (vcpu->arch.smi_pending && !is_smm(vcpu)))
9691 return true;
9692
9693 if (kvm_arch_interrupt_allowed(vcpu) &&
9694 (kvm_cpu_has_interrupt(vcpu) ||
9695 kvm_guest_apic_has_interrupt(vcpu)))
9696 return true;
9697
9698 if (kvm_hv_has_stimer_pending(vcpu))
9699 return true;
9700
9701 return false;
9702}
9703
9704int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
9705{
9706 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
9707}
9708
9709bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
9710{
9711 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
9712 return true;
9713
9714 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
9715 kvm_test_request(KVM_REQ_SMI, vcpu) ||
9716 kvm_test_request(KVM_REQ_EVENT, vcpu))
9717 return true;
9718
9719 if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
9720 return true;
9721
9722 return false;
9723}
9724
9725bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
9726{
9727 return vcpu->arch.preempted_in_kernel;
9728}
9729
9730int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
9731{
9732 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
9733}
9734
9735int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
9736{
9737 return kvm_x86_ops->interrupt_allowed(vcpu);
9738}
9739
9740unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
9741{
9742 if (is_64_bit_mode(vcpu))
9743 return kvm_rip_read(vcpu);
9744 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
9745 kvm_rip_read(vcpu));
9746}
9747EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
9748
9749bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
9750{
9751 return kvm_get_linear_rip(vcpu) == linear_rip;
9752}
9753EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
9754
9755unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
9756{
9757 unsigned long rflags;
9758
9759 rflags = kvm_x86_ops->get_rflags(vcpu);
9760 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
9761 rflags &= ~X86_EFLAGS_TF;
9762 return rflags;
9763}
9764EXPORT_SYMBOL_GPL(kvm_get_rflags);
9765
9766static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
9767{
9768 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
9769 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
9770 rflags |= X86_EFLAGS_TF;
9771 kvm_x86_ops->set_rflags(vcpu, rflags);
9772}
9773
9774void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
9775{
9776 __kvm_set_rflags(vcpu, rflags);
9777 kvm_make_request(KVM_REQ_EVENT, vcpu);
9778}
9779EXPORT_SYMBOL_GPL(kvm_set_rflags);
9780
9781void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
9782{
9783 int r;
9784
9785 if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
9786 work->wakeup_all)
9787 return;
9788
9789 r = kvm_mmu_reload(vcpu);
9790 if (unlikely(r))
9791 return;
9792
9793 if (!vcpu->arch.mmu->direct_map &&
9794 work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
9795 return;
9796
9797 vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
9798}
9799
9800static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
9801{
9802 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
9803}
9804
9805static inline u32 kvm_async_pf_next_probe(u32 key)
9806{
9807 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
9808}
9809
9810static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
9811{
9812 u32 key = kvm_async_pf_hash_fn(gfn);
9813
9814 while (vcpu->arch.apf.gfns[key] != ~0)
9815 key = kvm_async_pf_next_probe(key);
9816
9817 vcpu->arch.apf.gfns[key] = gfn;
9818}
9819
9820static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
9821{
9822 int i;
9823 u32 key = kvm_async_pf_hash_fn(gfn);
9824
9825 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
9826 (vcpu->arch.apf.gfns[key] != gfn &&
9827 vcpu->arch.apf.gfns[key] != ~0); i++)
9828 key = kvm_async_pf_next_probe(key);
9829
9830 return key;
9831}
9832
9833bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
9834{
9835 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
9836}
9837
9838static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
9839{
9840 u32 i, j, k;
9841
9842 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
9843 while (true) {
9844 vcpu->arch.apf.gfns[i] = ~0;
9845 do {
9846 j = kvm_async_pf_next_probe(j);
9847 if (vcpu->arch.apf.gfns[j] == ~0)
9848 return;
9849 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
9850
9851
9852
9853
9854
9855 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
9856 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
9857 i = j;
9858 }
9859}
9860
9861static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
9862{
9863
9864 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
9865 sizeof(val));
9866}
9867
9868static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
9869{
9870
9871 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
9872 sizeof(u32));
9873}
9874
9875static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
9876{
9877 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
9878 return false;
9879
9880 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
9881 (vcpu->arch.apf.send_user_only &&
9882 kvm_x86_ops->get_cpl(vcpu) == 0))
9883 return false;
9884
9885 return true;
9886}
9887
9888bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
9889{
9890 if (unlikely(!lapic_in_kernel(vcpu) ||
9891 kvm_event_needs_reinjection(vcpu) ||
9892 vcpu->arch.exception.pending))
9893 return false;
9894
9895 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
9896 return false;
9897
9898
9899
9900
9901
9902 return kvm_x86_ops->interrupt_allowed(vcpu);
9903}
9904
9905void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
9906 struct kvm_async_pf *work)
9907{
9908 struct x86_exception fault;
9909
9910 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
9911 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
9912
9913 if (kvm_can_deliver_async_pf(vcpu) &&
9914 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
9915 fault.vector = PF_VECTOR;
9916 fault.error_code_valid = true;
9917 fault.error_code = 0;
9918 fault.nested_page_fault = false;
9919 fault.address = work->arch.token;
9920 fault.async_page_fault = true;
9921 kvm_inject_page_fault(vcpu, &fault);
9922 } else {
9923
9924
9925
9926
9927
9928
9929
9930
9931 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
9932 }
9933}
9934
9935void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
9936 struct kvm_async_pf *work)
9937{
9938 struct x86_exception fault;
9939 u32 val;
9940
9941 if (work->wakeup_all)
9942 work->arch.token = ~0;
9943 else
9944 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
9945 trace_kvm_async_pf_ready(work->arch.token, work->gva);
9946
9947 if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
9948 !apf_get_user(vcpu, &val)) {
9949 if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
9950 vcpu->arch.exception.pending &&
9951 vcpu->arch.exception.nr == PF_VECTOR &&
9952 !apf_put_user(vcpu, 0)) {
9953 vcpu->arch.exception.injected = false;
9954 vcpu->arch.exception.pending = false;
9955 vcpu->arch.exception.nr = 0;
9956 vcpu->arch.exception.has_error_code = false;
9957 vcpu->arch.exception.error_code = 0;
9958 vcpu->arch.exception.has_payload = false;
9959 vcpu->arch.exception.payload = 0;
9960 } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
9961 fault.vector = PF_VECTOR;
9962 fault.error_code_valid = true;
9963 fault.error_code = 0;
9964 fault.nested_page_fault = false;
9965 fault.address = work->arch.token;
9966 fault.async_page_fault = true;
9967 kvm_inject_page_fault(vcpu, &fault);
9968 }
9969 }
9970 vcpu->arch.apf.halted = false;
9971 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9972}
9973
9974bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
9975{
9976 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
9977 return true;
9978 else
9979 return kvm_can_do_async_pf(vcpu);
9980}
9981
9982void kvm_arch_start_assignment(struct kvm *kvm)
9983{
9984 atomic_inc(&kvm->arch.assigned_device_count);
9985}
9986EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
9987
9988void kvm_arch_end_assignment(struct kvm *kvm)
9989{
9990 atomic_dec(&kvm->arch.assigned_device_count);
9991}
9992EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
9993
9994bool kvm_arch_has_assigned_device(struct kvm *kvm)
9995{
9996 return atomic_read(&kvm->arch.assigned_device_count);
9997}
9998EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
9999
10000void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
10001{
10002 atomic_inc(&kvm->arch.noncoherent_dma_count);
10003}
10004EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
10005
10006void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
10007{
10008 atomic_dec(&kvm->arch.noncoherent_dma_count);
10009}
10010EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
10011
10012bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
10013{
10014 return atomic_read(&kvm->arch.noncoherent_dma_count);
10015}
10016EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
10017
10018bool kvm_arch_has_irq_bypass(void)
10019{
10020 return kvm_x86_ops->update_pi_irte != NULL;
10021}
10022
10023int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
10024 struct irq_bypass_producer *prod)
10025{
10026 struct kvm_kernel_irqfd *irqfd =
10027 container_of(cons, struct kvm_kernel_irqfd, consumer);
10028
10029 irqfd->producer = prod;
10030
10031 return kvm_x86_ops->update_pi_irte(irqfd->kvm,
10032 prod->irq, irqfd->gsi, 1);
10033}
10034
10035void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
10036 struct irq_bypass_producer *prod)
10037{
10038 int ret;
10039 struct kvm_kernel_irqfd *irqfd =
10040 container_of(cons, struct kvm_kernel_irqfd, consumer);
10041
10042 WARN_ON(irqfd->producer != prod);
10043 irqfd->producer = NULL;
10044
10045
10046
10047
10048
10049
10050
10051 ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
10052 if (ret)
10053 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
10054 " fails: %d\n", irqfd->consumer.token, ret);
10055}
10056
10057int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
10058 uint32_t guest_irq, bool set)
10059{
10060 if (!kvm_x86_ops->update_pi_irte)
10061 return -EINVAL;
10062
10063 return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
10064}
10065
10066bool kvm_vector_hashing_enabled(void)
10067{
10068 return vector_hashing;
10069}
10070EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
10071
10072bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
10073{
10074 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
10075}
10076EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
10077
10078
10079EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
10080EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
10081EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
10082EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
10083EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
10084EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
10085EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
10086EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
10087EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
10088EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
10089EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
10090EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
10091EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
10092EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
10093EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
10094EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
10095EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
10096EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
10097EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
10098