1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30#include "pmu.h"
31#include "hyperv.h"
32
33#include <linux/clocksource.h>
34#include <linux/interrupt.h>
35#include <linux/kvm.h>
36#include <linux/fs.h>
37#include <linux/vmalloc.h>
38#include <linux/export.h>
39#include <linux/moduleparam.h>
40#include <linux/mman.h>
41#include <linux/highmem.h>
42#include <linux/iommu.h>
43#include <linux/intel-iommu.h>
44#include <linux/cpufreq.h>
45#include <linux/user-return-notifier.h>
46#include <linux/srcu.h>
47#include <linux/slab.h>
48#include <linux/perf_event.h>
49#include <linux/uaccess.h>
50#include <linux/hash.h>
51#include <linux/pci.h>
52#include <linux/timekeeper_internal.h>
53#include <linux/pvclock_gtod.h>
54#include <linux/kvm_irqfd.h>
55#include <linux/irqbypass.h>
56#include <linux/sched/stat.h>
57#include <linux/mem_encrypt.h>
58
59#include <trace/events/kvm.h>
60
61#include <asm/debugreg.h>
62#include <asm/msr.h>
63#include <asm/desc.h>
64#include <asm/mce.h>
65#include <linux/kernel_stat.h>
66#include <asm/fpu/internal.h>
67#include <asm/pvclock.h>
68#include <asm/div64.h>
69#include <asm/irq_remapping.h>
70#include <asm/mshyperv.h>
71#include <asm/hypervisor.h>
72
73#define CREATE_TRACE_POINTS
74#include "trace.h"
75
76#define MAX_IO_MSRS 256
77#define KVM_MAX_MCE_BANKS 32
78u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
79EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
80
81#define emul_to_vcpu(ctxt) \
82 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
83
84
85
86
87
88#ifdef CONFIG_X86_64
89static
90u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
91#else
92static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
93#endif
94
95#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
96#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
97
98#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
99 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
100
101static void update_cr8_intercept(struct kvm_vcpu *vcpu);
102static void process_nmi(struct kvm_vcpu *vcpu);
103static void enter_smm(struct kvm_vcpu *vcpu);
104static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
105static void store_regs(struct kvm_vcpu *vcpu);
106static int sync_regs(struct kvm_vcpu *vcpu);
107
108struct kvm_x86_ops *kvm_x86_ops __read_mostly;
109EXPORT_SYMBOL_GPL(kvm_x86_ops);
110
111static bool __read_mostly ignore_msrs = 0;
112module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
113
114static bool __read_mostly report_ignored_msrs = true;
115module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
116
117unsigned int min_timer_period_us = 200;
118module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
119
120static bool __read_mostly kvmclock_periodic_sync = true;
121module_param(kvmclock_periodic_sync, bool, S_IRUGO);
122
123bool __read_mostly kvm_has_tsc_control;
124EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
125u32 __read_mostly kvm_max_guest_tsc_khz;
126EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
127u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
128EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
129u64 __read_mostly kvm_max_tsc_scaling_ratio;
130EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
131u64 __read_mostly kvm_default_tsc_scaling_ratio;
132EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
133
134
135static u32 __read_mostly tsc_tolerance_ppm = 250;
136module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
137
138
139unsigned int __read_mostly lapic_timer_advance_ns = 0;
140module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
141EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
142
143static bool __read_mostly vector_hashing = true;
144module_param(vector_hashing, bool, S_IRUGO);
145
146bool __read_mostly enable_vmware_backdoor = false;
147module_param(enable_vmware_backdoor, bool, S_IRUGO);
148EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
149
150static bool __read_mostly force_emulation_prefix = false;
151module_param(force_emulation_prefix, bool, S_IRUGO);
152
153#define KVM_NR_SHARED_MSRS 16
154
155struct kvm_shared_msrs_global {
156 int nr;
157 u32 msrs[KVM_NR_SHARED_MSRS];
158};
159
160struct kvm_shared_msrs {
161 struct user_return_notifier urn;
162 bool registered;
163 struct kvm_shared_msr_values {
164 u64 host;
165 u64 curr;
166 } values[KVM_NR_SHARED_MSRS];
167};
168
169static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
170static struct kvm_shared_msrs __percpu *shared_msrs;
171
172struct kvm_stats_debugfs_item debugfs_entries[] = {
173 { "pf_fixed", VCPU_STAT(pf_fixed) },
174 { "pf_guest", VCPU_STAT(pf_guest) },
175 { "tlb_flush", VCPU_STAT(tlb_flush) },
176 { "invlpg", VCPU_STAT(invlpg) },
177 { "exits", VCPU_STAT(exits) },
178 { "io_exits", VCPU_STAT(io_exits) },
179 { "mmio_exits", VCPU_STAT(mmio_exits) },
180 { "signal_exits", VCPU_STAT(signal_exits) },
181 { "irq_window", VCPU_STAT(irq_window_exits) },
182 { "nmi_window", VCPU_STAT(nmi_window_exits) },
183 { "halt_exits", VCPU_STAT(halt_exits) },
184 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
185 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
186 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
187 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
188 { "hypercalls", VCPU_STAT(hypercalls) },
189 { "request_irq", VCPU_STAT(request_irq_exits) },
190 { "irq_exits", VCPU_STAT(irq_exits) },
191 { "host_state_reload", VCPU_STAT(host_state_reload) },
192 { "fpu_reload", VCPU_STAT(fpu_reload) },
193 { "insn_emulation", VCPU_STAT(insn_emulation) },
194 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
195 { "irq_injections", VCPU_STAT(irq_injections) },
196 { "nmi_injections", VCPU_STAT(nmi_injections) },
197 { "req_event", VCPU_STAT(req_event) },
198 { "l1d_flush", VCPU_STAT(l1d_flush) },
199 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
200 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
201 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
202 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
203 { "mmu_flooded", VM_STAT(mmu_flooded) },
204 { "mmu_recycled", VM_STAT(mmu_recycled) },
205 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
206 { "mmu_unsync", VM_STAT(mmu_unsync) },
207 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
208 { "largepages", VM_STAT(lpages) },
209 { "max_mmu_page_hash_collisions",
210 VM_STAT(max_mmu_page_hash_collisions) },
211 { NULL }
212};
213
214u64 __read_mostly host_xcr0;
215
216static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
217
218static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
219{
220 int i;
221 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
222 vcpu->arch.apf.gfns[i] = ~0;
223}
224
225static void kvm_on_user_return(struct user_return_notifier *urn)
226{
227 unsigned slot;
228 struct kvm_shared_msrs *locals
229 = container_of(urn, struct kvm_shared_msrs, urn);
230 struct kvm_shared_msr_values *values;
231 unsigned long flags;
232
233
234
235
236
237 local_irq_save(flags);
238 if (locals->registered) {
239 locals->registered = false;
240 user_return_notifier_unregister(urn);
241 }
242 local_irq_restore(flags);
243 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
244 values = &locals->values[slot];
245 if (values->host != values->curr) {
246 wrmsrl(shared_msrs_global.msrs[slot], values->host);
247 values->curr = values->host;
248 }
249 }
250}
251
252static void shared_msr_update(unsigned slot, u32 msr)
253{
254 u64 value;
255 unsigned int cpu = smp_processor_id();
256 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
257
258
259
260 if (slot >= shared_msrs_global.nr) {
261 printk(KERN_ERR "kvm: invalid MSR slot!");
262 return;
263 }
264 rdmsrl_safe(msr, &value);
265 smsr->values[slot].host = value;
266 smsr->values[slot].curr = value;
267}
268
269void kvm_define_shared_msr(unsigned slot, u32 msr)
270{
271 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
272 shared_msrs_global.msrs[slot] = msr;
273 if (slot >= shared_msrs_global.nr)
274 shared_msrs_global.nr = slot + 1;
275}
276EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
277
278static void kvm_shared_msr_cpu_online(void)
279{
280 unsigned i;
281
282 for (i = 0; i < shared_msrs_global.nr; ++i)
283 shared_msr_update(i, shared_msrs_global.msrs[i]);
284}
285
286int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
287{
288 unsigned int cpu = smp_processor_id();
289 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
290 int err;
291
292 if (((value ^ smsr->values[slot].curr) & mask) == 0)
293 return 0;
294 smsr->values[slot].curr = value;
295 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
296 if (err)
297 return 1;
298
299 if (!smsr->registered) {
300 smsr->urn.on_user_return = kvm_on_user_return;
301 user_return_notifier_register(&smsr->urn);
302 smsr->registered = true;
303 }
304 return 0;
305}
306EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
307
308static void drop_user_return_notifiers(void)
309{
310 unsigned int cpu = smp_processor_id();
311 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
312
313 if (smsr->registered)
314 kvm_on_user_return(&smsr->urn);
315}
316
317u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
318{
319 return vcpu->arch.apic_base;
320}
321EXPORT_SYMBOL_GPL(kvm_get_apic_base);
322
323enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
324{
325 return kvm_apic_mode(kvm_get_apic_base(vcpu));
326}
327EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
328
329int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
330{
331 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
332 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
333 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
334 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
335
336 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
337 return 1;
338 if (!msr_info->host_initiated) {
339 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
340 return 1;
341 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
342 return 1;
343 }
344
345 kvm_lapic_set_base(vcpu, msr_info->data);
346 return 0;
347}
348EXPORT_SYMBOL_GPL(kvm_set_apic_base);
349
350asmlinkage __visible void kvm_spurious_fault(void)
351{
352
353 BUG();
354}
355EXPORT_SYMBOL_GPL(kvm_spurious_fault);
356
357#define EXCPT_BENIGN 0
358#define EXCPT_CONTRIBUTORY 1
359#define EXCPT_PF 2
360
361static int exception_class(int vector)
362{
363 switch (vector) {
364 case PF_VECTOR:
365 return EXCPT_PF;
366 case DE_VECTOR:
367 case TS_VECTOR:
368 case NP_VECTOR:
369 case SS_VECTOR:
370 case GP_VECTOR:
371 return EXCPT_CONTRIBUTORY;
372 default:
373 break;
374 }
375 return EXCPT_BENIGN;
376}
377
378#define EXCPT_FAULT 0
379#define EXCPT_TRAP 1
380#define EXCPT_ABORT 2
381#define EXCPT_INTERRUPT 3
382
383static int exception_type(int vector)
384{
385 unsigned int mask;
386
387 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
388 return EXCPT_INTERRUPT;
389
390 mask = 1 << vector;
391
392
393 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
394 return EXCPT_TRAP;
395
396 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
397 return EXCPT_ABORT;
398
399
400 return EXCPT_FAULT;
401}
402
403static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
404 unsigned nr, bool has_error, u32 error_code,
405 bool reinject)
406{
407 u32 prev_nr;
408 int class1, class2;
409
410 kvm_make_request(KVM_REQ_EVENT, vcpu);
411
412 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
413 queue:
414 if (has_error && !is_protmode(vcpu))
415 has_error = false;
416 if (reinject) {
417
418
419
420
421
422
423
424
425 WARN_ON_ONCE(vcpu->arch.exception.pending);
426 vcpu->arch.exception.injected = true;
427 } else {
428 vcpu->arch.exception.pending = true;
429 vcpu->arch.exception.injected = false;
430 }
431 vcpu->arch.exception.has_error_code = has_error;
432 vcpu->arch.exception.nr = nr;
433 vcpu->arch.exception.error_code = error_code;
434 return;
435 }
436
437
438 prev_nr = vcpu->arch.exception.nr;
439 if (prev_nr == DF_VECTOR) {
440
441 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
442 return;
443 }
444 class1 = exception_class(prev_nr);
445 class2 = exception_class(nr);
446 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
447 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
448
449
450
451
452
453 vcpu->arch.exception.pending = true;
454 vcpu->arch.exception.injected = false;
455 vcpu->arch.exception.has_error_code = true;
456 vcpu->arch.exception.nr = DF_VECTOR;
457 vcpu->arch.exception.error_code = 0;
458 } else
459
460
461
462 goto queue;
463}
464
465void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
466{
467 kvm_multiple_exception(vcpu, nr, false, 0, false);
468}
469EXPORT_SYMBOL_GPL(kvm_queue_exception);
470
471void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
472{
473 kvm_multiple_exception(vcpu, nr, false, 0, true);
474}
475EXPORT_SYMBOL_GPL(kvm_requeue_exception);
476
477int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
478{
479 if (err)
480 kvm_inject_gp(vcpu, 0);
481 else
482 return kvm_skip_emulated_instruction(vcpu);
483
484 return 1;
485}
486EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
487
488void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
489{
490 ++vcpu->stat.pf_guest;
491 vcpu->arch.exception.nested_apf =
492 is_guest_mode(vcpu) && fault->async_page_fault;
493 if (vcpu->arch.exception.nested_apf)
494 vcpu->arch.apf.nested_apf_token = fault->address;
495 else
496 vcpu->arch.cr2 = fault->address;
497 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
498}
499EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
500
501static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
502{
503 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
504 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
505 else
506 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
507
508 return fault->nested_page_fault;
509}
510
511void kvm_inject_nmi(struct kvm_vcpu *vcpu)
512{
513 atomic_inc(&vcpu->arch.nmi_queued);
514 kvm_make_request(KVM_REQ_NMI, vcpu);
515}
516EXPORT_SYMBOL_GPL(kvm_inject_nmi);
517
518void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
519{
520 kvm_multiple_exception(vcpu, nr, true, error_code, false);
521}
522EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
523
524void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
525{
526 kvm_multiple_exception(vcpu, nr, true, error_code, true);
527}
528EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
529
530
531
532
533
534bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
535{
536 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
537 return true;
538 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
539 return false;
540}
541EXPORT_SYMBOL_GPL(kvm_require_cpl);
542
543bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
544{
545 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
546 return true;
547
548 kvm_queue_exception(vcpu, UD_VECTOR);
549 return false;
550}
551EXPORT_SYMBOL_GPL(kvm_require_dr);
552
553
554
555
556
557
558int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
559 gfn_t ngfn, void *data, int offset, int len,
560 u32 access)
561{
562 struct x86_exception exception;
563 gfn_t real_gfn;
564 gpa_t ngpa;
565
566 ngpa = gfn_to_gpa(ngfn);
567 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
568 if (real_gfn == UNMAPPED_GVA)
569 return -EFAULT;
570
571 real_gfn = gpa_to_gfn(real_gfn);
572
573 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
574}
575EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
576
577static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
578 void *data, int offset, int len, u32 access)
579{
580 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
581 data, offset, len, access);
582}
583
584
585
586
587int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
588{
589 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
590 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
591 int i;
592 int ret;
593 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
594
595 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
596 offset * sizeof(u64), sizeof(pdpte),
597 PFERR_USER_MASK|PFERR_WRITE_MASK);
598 if (ret < 0) {
599 ret = 0;
600 goto out;
601 }
602 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
603 if ((pdpte[i] & PT_PRESENT_MASK) &&
604 (pdpte[i] &
605 vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
606 ret = 0;
607 goto out;
608 }
609 }
610 ret = 1;
611
612 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
613 __set_bit(VCPU_EXREG_PDPTR,
614 (unsigned long *)&vcpu->arch.regs_avail);
615 __set_bit(VCPU_EXREG_PDPTR,
616 (unsigned long *)&vcpu->arch.regs_dirty);
617out:
618
619 return ret;
620}
621EXPORT_SYMBOL_GPL(load_pdptrs);
622
623bool pdptrs_changed(struct kvm_vcpu *vcpu)
624{
625 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
626 bool changed = true;
627 int offset;
628 gfn_t gfn;
629 int r;
630
631 if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
632 return false;
633
634 if (!test_bit(VCPU_EXREG_PDPTR,
635 (unsigned long *)&vcpu->arch.regs_avail))
636 return true;
637
638 gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
639 offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
640 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
641 PFERR_USER_MASK | PFERR_WRITE_MASK);
642 if (r < 0)
643 goto out;
644 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
645out:
646
647 return changed;
648}
649EXPORT_SYMBOL_GPL(pdptrs_changed);
650
651int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
652{
653 unsigned long old_cr0 = kvm_read_cr0(vcpu);
654 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
655
656 cr0 |= X86_CR0_ET;
657
658#ifdef CONFIG_X86_64
659 if (cr0 & 0xffffffff00000000UL)
660 return 1;
661#endif
662
663 cr0 &= ~CR0_RESERVED_BITS;
664
665 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
666 return 1;
667
668 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
669 return 1;
670
671 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
672#ifdef CONFIG_X86_64
673 if ((vcpu->arch.efer & EFER_LME)) {
674 int cs_db, cs_l;
675
676 if (!is_pae(vcpu))
677 return 1;
678 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
679 if (cs_l)
680 return 1;
681 } else
682#endif
683 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
684 kvm_read_cr3(vcpu)))
685 return 1;
686 }
687
688 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
689 return 1;
690
691 kvm_x86_ops->set_cr0(vcpu, cr0);
692
693 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
694 kvm_clear_async_pf_completion_queue(vcpu);
695 kvm_async_pf_hash_reset(vcpu);
696 }
697
698 if ((cr0 ^ old_cr0) & update_bits)
699 kvm_mmu_reset_context(vcpu);
700
701 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
702 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
703 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
704 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
705
706 return 0;
707}
708EXPORT_SYMBOL_GPL(kvm_set_cr0);
709
710void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
711{
712 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
713}
714EXPORT_SYMBOL_GPL(kvm_lmsw);
715
716static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
717{
718 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
719 !vcpu->guest_xcr0_loaded) {
720
721 if (vcpu->arch.xcr0 != host_xcr0)
722 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
723 vcpu->guest_xcr0_loaded = 1;
724 }
725}
726
727static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
728{
729 if (vcpu->guest_xcr0_loaded) {
730 if (vcpu->arch.xcr0 != host_xcr0)
731 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
732 vcpu->guest_xcr0_loaded = 0;
733 }
734}
735
736static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
737{
738 u64 xcr0 = xcr;
739 u64 old_xcr0 = vcpu->arch.xcr0;
740 u64 valid_bits;
741
742
743 if (index != XCR_XFEATURE_ENABLED_MASK)
744 return 1;
745 if (!(xcr0 & XFEATURE_MASK_FP))
746 return 1;
747 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
748 return 1;
749
750
751
752
753
754
755 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
756 if (xcr0 & ~valid_bits)
757 return 1;
758
759 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
760 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
761 return 1;
762
763 if (xcr0 & XFEATURE_MASK_AVX512) {
764 if (!(xcr0 & XFEATURE_MASK_YMM))
765 return 1;
766 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
767 return 1;
768 }
769 vcpu->arch.xcr0 = xcr0;
770
771 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
772 kvm_update_cpuid(vcpu);
773 return 0;
774}
775
776int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
777{
778 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
779 __kvm_set_xcr(vcpu, index, xcr)) {
780 kvm_inject_gp(vcpu, 0);
781 return 1;
782 }
783 return 0;
784}
785EXPORT_SYMBOL_GPL(kvm_set_xcr);
786
787int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
788{
789 unsigned long old_cr4 = kvm_read_cr4(vcpu);
790 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
791 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
792
793 if (cr4 & CR4_RESERVED_BITS)
794 return 1;
795
796 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
797 return 1;
798
799 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
800 return 1;
801
802 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
803 return 1;
804
805 if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
806 return 1;
807
808 if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
809 return 1;
810
811 if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
812 return 1;
813
814 if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
815 return 1;
816
817 if (is_long_mode(vcpu)) {
818 if (!(cr4 & X86_CR4_PAE))
819 return 1;
820 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
821 && ((cr4 ^ old_cr4) & pdptr_bits)
822 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
823 kvm_read_cr3(vcpu)))
824 return 1;
825
826 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
827 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
828 return 1;
829
830
831 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
832 return 1;
833 }
834
835 if (kvm_x86_ops->set_cr4(vcpu, cr4))
836 return 1;
837
838 if (((cr4 ^ old_cr4) & pdptr_bits) ||
839 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
840 kvm_mmu_reset_context(vcpu);
841
842 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
843 kvm_update_cpuid(vcpu);
844
845 return 0;
846}
847EXPORT_SYMBOL_GPL(kvm_set_cr4);
848
849int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
850{
851 bool skip_tlb_flush = false;
852#ifdef CONFIG_X86_64
853 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
854
855 if (pcid_enabled) {
856 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
857 cr3 &= ~X86_CR3_PCID_NOFLUSH;
858 }
859#endif
860
861 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
862 if (!skip_tlb_flush) {
863 kvm_mmu_sync_roots(vcpu);
864 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
865 }
866 return 0;
867 }
868
869 if (is_long_mode(vcpu) &&
870 (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
871 return 1;
872 else if (is_pae(vcpu) && is_paging(vcpu) &&
873 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
874 return 1;
875
876 kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
877 vcpu->arch.cr3 = cr3;
878 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
879
880 return 0;
881}
882EXPORT_SYMBOL_GPL(kvm_set_cr3);
883
884int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
885{
886 if (cr8 & CR8_RESERVED_BITS)
887 return 1;
888 if (lapic_in_kernel(vcpu))
889 kvm_lapic_set_tpr(vcpu, cr8);
890 else
891 vcpu->arch.cr8 = cr8;
892 return 0;
893}
894EXPORT_SYMBOL_GPL(kvm_set_cr8);
895
896unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
897{
898 if (lapic_in_kernel(vcpu))
899 return kvm_lapic_get_cr8(vcpu);
900 else
901 return vcpu->arch.cr8;
902}
903EXPORT_SYMBOL_GPL(kvm_get_cr8);
904
905static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
906{
907 int i;
908
909 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
910 for (i = 0; i < KVM_NR_DB_REGS; i++)
911 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
912 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
913 }
914}
915
916static void kvm_update_dr6(struct kvm_vcpu *vcpu)
917{
918 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
919 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
920}
921
922static void kvm_update_dr7(struct kvm_vcpu *vcpu)
923{
924 unsigned long dr7;
925
926 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
927 dr7 = vcpu->arch.guest_debug_dr7;
928 else
929 dr7 = vcpu->arch.dr7;
930 kvm_x86_ops->set_dr7(vcpu, dr7);
931 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
932 if (dr7 & DR7_BP_EN_MASK)
933 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
934}
935
936static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
937{
938 u64 fixed = DR6_FIXED_1;
939
940 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
941 fixed |= DR6_RTM;
942 return fixed;
943}
944
945static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
946{
947 switch (dr) {
948 case 0 ... 3:
949 vcpu->arch.db[dr] = val;
950 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
951 vcpu->arch.eff_db[dr] = val;
952 break;
953 case 4:
954
955 case 6:
956 if (val & 0xffffffff00000000ULL)
957 return -1;
958 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
959 kvm_update_dr6(vcpu);
960 break;
961 case 5:
962
963 default:
964 if (val & 0xffffffff00000000ULL)
965 return -1;
966 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
967 kvm_update_dr7(vcpu);
968 break;
969 }
970
971 return 0;
972}
973
974int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
975{
976 if (__kvm_set_dr(vcpu, dr, val)) {
977 kvm_inject_gp(vcpu, 0);
978 return 1;
979 }
980 return 0;
981}
982EXPORT_SYMBOL_GPL(kvm_set_dr);
983
984int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
985{
986 switch (dr) {
987 case 0 ... 3:
988 *val = vcpu->arch.db[dr];
989 break;
990 case 4:
991
992 case 6:
993 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
994 *val = vcpu->arch.dr6;
995 else
996 *val = kvm_x86_ops->get_dr6(vcpu);
997 break;
998 case 5:
999
1000 default:
1001 *val = vcpu->arch.dr7;
1002 break;
1003 }
1004 return 0;
1005}
1006EXPORT_SYMBOL_GPL(kvm_get_dr);
1007
1008bool kvm_rdpmc(struct kvm_vcpu *vcpu)
1009{
1010 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
1011 u64 data;
1012 int err;
1013
1014 err = kvm_pmu_rdpmc(vcpu, ecx, &data);
1015 if (err)
1016 return err;
1017 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
1018 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
1019 return err;
1020}
1021EXPORT_SYMBOL_GPL(kvm_rdpmc);
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033static u32 msrs_to_save[] = {
1034 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1035 MSR_STAR,
1036#ifdef CONFIG_X86_64
1037 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1038#endif
1039 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1040 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1041 MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
1042};
1043
1044static unsigned num_msrs_to_save;
1045
1046static u32 emulated_msrs[] = {
1047 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1048 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1049 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1050 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1051 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1052 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1053 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1054 HV_X64_MSR_RESET,
1055 HV_X64_MSR_VP_INDEX,
1056 HV_X64_MSR_VP_RUNTIME,
1057 HV_X64_MSR_SCONTROL,
1058 HV_X64_MSR_STIMER0_CONFIG,
1059 HV_X64_MSR_VP_ASSIST_PAGE,
1060 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1061 HV_X64_MSR_TSC_EMULATION_STATUS,
1062
1063 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1064 MSR_KVM_PV_EOI_EN,
1065
1066 MSR_IA32_TSC_ADJUST,
1067 MSR_IA32_TSCDEADLINE,
1068 MSR_IA32_MISC_ENABLE,
1069 MSR_IA32_MCG_STATUS,
1070 MSR_IA32_MCG_CTL,
1071 MSR_IA32_MCG_EXT_CTL,
1072 MSR_IA32_SMBASE,
1073 MSR_SMI_COUNT,
1074 MSR_PLATFORM_INFO,
1075 MSR_MISC_FEATURES_ENABLES,
1076 MSR_AMD64_VIRT_SPEC_CTRL,
1077};
1078
1079static unsigned num_emulated_msrs;
1080
1081
1082
1083
1084
1085static u32 msr_based_features[] = {
1086 MSR_IA32_VMX_BASIC,
1087 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1088 MSR_IA32_VMX_PINBASED_CTLS,
1089 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1090 MSR_IA32_VMX_PROCBASED_CTLS,
1091 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1092 MSR_IA32_VMX_EXIT_CTLS,
1093 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1094 MSR_IA32_VMX_ENTRY_CTLS,
1095 MSR_IA32_VMX_MISC,
1096 MSR_IA32_VMX_CR0_FIXED0,
1097 MSR_IA32_VMX_CR0_FIXED1,
1098 MSR_IA32_VMX_CR4_FIXED0,
1099 MSR_IA32_VMX_CR4_FIXED1,
1100 MSR_IA32_VMX_VMCS_ENUM,
1101 MSR_IA32_VMX_PROCBASED_CTLS2,
1102 MSR_IA32_VMX_EPT_VPID_CAP,
1103 MSR_IA32_VMX_VMFUNC,
1104
1105 MSR_F10H_DECFG,
1106 MSR_IA32_UCODE_REV,
1107 MSR_IA32_ARCH_CAPABILITIES,
1108};
1109
1110static unsigned int num_msr_based_features;
1111
1112u64 kvm_get_arch_capabilities(void)
1113{
1114 u64 data;
1115
1116 rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1128 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1129
1130 return data;
1131}
1132EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
1133
1134static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1135{
1136 switch (msr->index) {
1137 case MSR_IA32_ARCH_CAPABILITIES:
1138 msr->data = kvm_get_arch_capabilities();
1139 break;
1140 case MSR_IA32_UCODE_REV:
1141 rdmsrl_safe(msr->index, &msr->data);
1142 break;
1143 default:
1144 if (kvm_x86_ops->get_msr_feature(msr))
1145 return 1;
1146 }
1147 return 0;
1148}
1149
1150static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1151{
1152 struct kvm_msr_entry msr;
1153 int r;
1154
1155 msr.index = index;
1156 r = kvm_get_msr_feature(&msr);
1157 if (r)
1158 return r;
1159
1160 *data = msr.data;
1161
1162 return 0;
1163}
1164
1165bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1166{
1167 if (efer & efer_reserved_bits)
1168 return false;
1169
1170 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1171 return false;
1172
1173 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1174 return false;
1175
1176 return true;
1177}
1178EXPORT_SYMBOL_GPL(kvm_valid_efer);
1179
1180static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
1181{
1182 u64 old_efer = vcpu->arch.efer;
1183
1184 if (!kvm_valid_efer(vcpu, efer))
1185 return 1;
1186
1187 if (is_paging(vcpu)
1188 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1189 return 1;
1190
1191 efer &= ~EFER_LMA;
1192 efer |= vcpu->arch.efer & EFER_LMA;
1193
1194 kvm_x86_ops->set_efer(vcpu, efer);
1195
1196
1197 if ((efer ^ old_efer) & EFER_NX)
1198 kvm_mmu_reset_context(vcpu);
1199
1200 return 0;
1201}
1202
1203void kvm_enable_efer_bits(u64 mask)
1204{
1205 efer_reserved_bits &= ~mask;
1206}
1207EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1208
1209
1210
1211
1212
1213
1214int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
1215{
1216 switch (msr->index) {
1217 case MSR_FS_BASE:
1218 case MSR_GS_BASE:
1219 case MSR_KERNEL_GS_BASE:
1220 case MSR_CSTAR:
1221 case MSR_LSTAR:
1222 if (is_noncanonical_address(msr->data, vcpu))
1223 return 1;
1224 break;
1225 case MSR_IA32_SYSENTER_EIP:
1226 case MSR_IA32_SYSENTER_ESP:
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239 msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
1240 }
1241 return kvm_x86_ops->set_msr(vcpu, msr);
1242}
1243EXPORT_SYMBOL_GPL(kvm_set_msr);
1244
1245
1246
1247
1248static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1249{
1250 struct msr_data msr;
1251 int r;
1252
1253 msr.index = index;
1254 msr.host_initiated = true;
1255 r = kvm_get_msr(vcpu, &msr);
1256 if (r)
1257 return r;
1258
1259 *data = msr.data;
1260 return 0;
1261}
1262
1263static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1264{
1265 struct msr_data msr;
1266
1267 msr.data = *data;
1268 msr.index = index;
1269 msr.host_initiated = true;
1270 return kvm_set_msr(vcpu, &msr);
1271}
1272
1273#ifdef CONFIG_X86_64
1274struct pvclock_gtod_data {
1275 seqcount_t seq;
1276
1277 struct {
1278 int vclock_mode;
1279 u64 cycle_last;
1280 u64 mask;
1281 u32 mult;
1282 u32 shift;
1283 } clock;
1284
1285 u64 boot_ns;
1286 u64 nsec_base;
1287 u64 wall_time_sec;
1288};
1289
1290static struct pvclock_gtod_data pvclock_gtod_data;
1291
1292static void update_pvclock_gtod(struct timekeeper *tk)
1293{
1294 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1295 u64 boot_ns;
1296
1297 boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
1298
1299 write_seqcount_begin(&vdata->seq);
1300
1301
1302 vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
1303 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1304 vdata->clock.mask = tk->tkr_mono.mask;
1305 vdata->clock.mult = tk->tkr_mono.mult;
1306 vdata->clock.shift = tk->tkr_mono.shift;
1307
1308 vdata->boot_ns = boot_ns;
1309 vdata->nsec_base = tk->tkr_mono.xtime_nsec;
1310
1311 vdata->wall_time_sec = tk->xtime_sec;
1312
1313 write_seqcount_end(&vdata->seq);
1314}
1315#endif
1316
1317void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
1318{
1319
1320
1321
1322
1323
1324 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1325}
1326
1327static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1328{
1329 int version;
1330 int r;
1331 struct pvclock_wall_clock wc;
1332 struct timespec64 boot;
1333
1334 if (!wall_clock)
1335 return;
1336
1337 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1338 if (r)
1339 return;
1340
1341 if (version & 1)
1342 ++version;
1343
1344 ++version;
1345
1346 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
1347 return;
1348
1349
1350
1351
1352
1353
1354
1355 getboottime64(&boot);
1356
1357 if (kvm->arch.kvmclock_offset) {
1358 struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
1359 boot = timespec64_sub(boot, ts);
1360 }
1361 wc.sec = (u32)boot.tv_sec;
1362 wc.nsec = boot.tv_nsec;
1363 wc.version = version;
1364
1365 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1366
1367 version++;
1368 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1369}
1370
1371static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1372{
1373 do_shl32_div32(dividend, divisor);
1374 return dividend;
1375}
1376
1377static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1378 s8 *pshift, u32 *pmultiplier)
1379{
1380 uint64_t scaled64;
1381 int32_t shift = 0;
1382 uint64_t tps64;
1383 uint32_t tps32;
1384
1385 tps64 = base_hz;
1386 scaled64 = scaled_hz;
1387 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1388 tps64 >>= 1;
1389 shift--;
1390 }
1391
1392 tps32 = (uint32_t)tps64;
1393 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1394 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1395 scaled64 >>= 1;
1396 else
1397 tps32 <<= 1;
1398 shift++;
1399 }
1400
1401 *pshift = shift;
1402 *pmultiplier = div_frac(scaled64, tps32);
1403
1404 pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
1405 __func__, base_hz, scaled_hz, shift, *pmultiplier);
1406}
1407
1408#ifdef CONFIG_X86_64
1409static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1410#endif
1411
1412static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1413static unsigned long max_tsc_khz;
1414
1415static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1416{
1417 u64 v = (u64)khz * (1000000 + ppm);
1418 do_div(v, 1000000);
1419 return v;
1420}
1421
1422static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1423{
1424 u64 ratio;
1425
1426
1427 if (!scale) {
1428 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1429 return 0;
1430 }
1431
1432
1433 if (!kvm_has_tsc_control) {
1434 if (user_tsc_khz > tsc_khz) {
1435 vcpu->arch.tsc_catchup = 1;
1436 vcpu->arch.tsc_always_catchup = 1;
1437 return 0;
1438 } else {
1439 WARN(1, "user requested TSC rate below hardware speed\n");
1440 return -1;
1441 }
1442 }
1443
1444
1445 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
1446 user_tsc_khz, tsc_khz);
1447
1448 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
1449 WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
1450 user_tsc_khz);
1451 return -1;
1452 }
1453
1454 vcpu->arch.tsc_scaling_ratio = ratio;
1455 return 0;
1456}
1457
1458static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1459{
1460 u32 thresh_lo, thresh_hi;
1461 int use_scaling = 0;
1462
1463
1464 if (user_tsc_khz == 0) {
1465
1466 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1467 return -1;
1468 }
1469
1470
1471 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
1472 &vcpu->arch.virtual_tsc_shift,
1473 &vcpu->arch.virtual_tsc_mult);
1474 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
1475
1476
1477
1478
1479
1480
1481
1482 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1483 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1484 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
1485 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
1486 use_scaling = 1;
1487 }
1488 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
1489}
1490
1491static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1492{
1493 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1494 vcpu->arch.virtual_tsc_mult,
1495 vcpu->arch.virtual_tsc_shift);
1496 tsc += vcpu->arch.this_tsc_write;
1497 return tsc;
1498}
1499
1500static inline int gtod_is_based_on_tsc(int mode)
1501{
1502 return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
1503}
1504
1505static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1506{
1507#ifdef CONFIG_X86_64
1508 bool vcpus_matched;
1509 struct kvm_arch *ka = &vcpu->kvm->arch;
1510 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1511
1512 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1513 atomic_read(&vcpu->kvm->online_vcpus));
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523 if (ka->use_master_clock ||
1524 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
1525 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1526
1527 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1528 atomic_read(&vcpu->kvm->online_vcpus),
1529 ka->use_master_clock, gtod->clock.vclock_mode);
1530#endif
1531}
1532
1533static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1534{
1535 u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1536 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1537}
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549static inline u64 __scale_tsc(u64 ratio, u64 tsc)
1550{
1551 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
1552}
1553
1554u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
1555{
1556 u64 _tsc = tsc;
1557 u64 ratio = vcpu->arch.tsc_scaling_ratio;
1558
1559 if (ratio != kvm_default_tsc_scaling_ratio)
1560 _tsc = __scale_tsc(ratio, tsc);
1561
1562 return _tsc;
1563}
1564EXPORT_SYMBOL_GPL(kvm_scale_tsc);
1565
1566static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1567{
1568 u64 tsc;
1569
1570 tsc = kvm_scale_tsc(vcpu, rdtsc());
1571
1572 return target_tsc - tsc;
1573}
1574
1575u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
1576{
1577 u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1578
1579 return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
1580}
1581EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
1582
1583static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1584{
1585 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1586 vcpu->arch.tsc_offset = offset;
1587}
1588
1589static inline bool kvm_check_tsc_unstable(void)
1590{
1591#ifdef CONFIG_X86_64
1592
1593
1594
1595
1596 if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
1597 return false;
1598#endif
1599 return check_tsc_unstable();
1600}
1601
1602void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1603{
1604 struct kvm *kvm = vcpu->kvm;
1605 u64 offset, ns, elapsed;
1606 unsigned long flags;
1607 bool matched;
1608 bool already_matched;
1609 u64 data = msr->data;
1610 bool synchronizing = false;
1611
1612 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1613 offset = kvm_compute_tsc_offset(vcpu, data);
1614 ns = ktime_get_boot_ns();
1615 elapsed = ns - kvm->arch.last_tsc_nsec;
1616
1617 if (vcpu->arch.virtual_tsc_khz) {
1618 if (data == 0 && msr->host_initiated) {
1619
1620
1621
1622
1623
1624 synchronizing = true;
1625 } else {
1626 u64 tsc_exp = kvm->arch.last_tsc_write +
1627 nsec_to_cycles(vcpu, elapsed);
1628 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
1629
1630
1631
1632
1633
1634 synchronizing = data < tsc_exp + tsc_hz &&
1635 data + tsc_hz > tsc_exp;
1636 }
1637 }
1638
1639
1640
1641
1642
1643
1644
1645 if (synchronizing &&
1646 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1647 if (!kvm_check_tsc_unstable()) {
1648 offset = kvm->arch.cur_tsc_offset;
1649 pr_debug("kvm: matched tsc offset for %llu\n", data);
1650 } else {
1651 u64 delta = nsec_to_cycles(vcpu, elapsed);
1652 data += delta;
1653 offset = kvm_compute_tsc_offset(vcpu, data);
1654 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1655 }
1656 matched = true;
1657 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1658 } else {
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668 kvm->arch.cur_tsc_generation++;
1669 kvm->arch.cur_tsc_nsec = ns;
1670 kvm->arch.cur_tsc_write = data;
1671 kvm->arch.cur_tsc_offset = offset;
1672 matched = false;
1673 pr_debug("kvm: new tsc generation %llu, clock %llu\n",
1674 kvm->arch.cur_tsc_generation, data);
1675 }
1676
1677
1678
1679
1680
1681 kvm->arch.last_tsc_nsec = ns;
1682 kvm->arch.last_tsc_write = data;
1683 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1684
1685 vcpu->arch.last_guest_tsc = data;
1686
1687
1688 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1689 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1690 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1691
1692 if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
1693 update_ia32_tsc_adjust_msr(vcpu, offset);
1694
1695 kvm_vcpu_write_tsc_offset(vcpu, offset);
1696 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1697
1698 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1699 if (!matched) {
1700 kvm->arch.nr_vcpus_matched_tsc = 0;
1701 } else if (!already_matched) {
1702 kvm->arch.nr_vcpus_matched_tsc++;
1703 }
1704
1705 kvm_track_tsc_matching(vcpu);
1706 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1707}
1708
1709EXPORT_SYMBOL_GPL(kvm_write_tsc);
1710
1711static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
1712 s64 adjustment)
1713{
1714 kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
1715}
1716
1717static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
1718{
1719 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
1720 WARN_ON(adjustment < 0);
1721 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
1722 adjust_tsc_offset_guest(vcpu, adjustment);
1723}
1724
1725#ifdef CONFIG_X86_64
1726
1727static u64 read_tsc(void)
1728{
1729 u64 ret = (u64)rdtsc_ordered();
1730 u64 last = pvclock_gtod_data.clock.cycle_last;
1731
1732 if (likely(ret >= last))
1733 return ret;
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743 asm volatile ("");
1744 return last;
1745}
1746
1747static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
1748{
1749 long v;
1750 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1751 u64 tsc_pg_val;
1752
1753 switch (gtod->clock.vclock_mode) {
1754 case VCLOCK_HVCLOCK:
1755 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
1756 tsc_timestamp);
1757 if (tsc_pg_val != U64_MAX) {
1758
1759 *mode = VCLOCK_HVCLOCK;
1760 v = (tsc_pg_val - gtod->clock.cycle_last) &
1761 gtod->clock.mask;
1762 } else {
1763
1764 *mode = VCLOCK_NONE;
1765 }
1766 break;
1767 case VCLOCK_TSC:
1768 *mode = VCLOCK_TSC;
1769 *tsc_timestamp = read_tsc();
1770 v = (*tsc_timestamp - gtod->clock.cycle_last) &
1771 gtod->clock.mask;
1772 break;
1773 default:
1774 *mode = VCLOCK_NONE;
1775 }
1776
1777 if (*mode == VCLOCK_NONE)
1778 *tsc_timestamp = v = 0;
1779
1780 return v * gtod->clock.mult;
1781}
1782
1783static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
1784{
1785 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1786 unsigned long seq;
1787 int mode;
1788 u64 ns;
1789
1790 do {
1791 seq = read_seqcount_begin(>od->seq);
1792 ns = gtod->nsec_base;
1793 ns += vgettsc(tsc_timestamp, &mode);
1794 ns >>= gtod->clock.shift;
1795 ns += gtod->boot_ns;
1796 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1797 *t = ns;
1798
1799 return mode;
1800}
1801
1802static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
1803{
1804 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1805 unsigned long seq;
1806 int mode;
1807 u64 ns;
1808
1809 do {
1810 seq = read_seqcount_begin(>od->seq);
1811 ts->tv_sec = gtod->wall_time_sec;
1812 ns = gtod->nsec_base;
1813 ns += vgettsc(tsc_timestamp, &mode);
1814 ns >>= gtod->clock.shift;
1815 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1816
1817 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
1818 ts->tv_nsec = ns;
1819
1820 return mode;
1821}
1822
1823
1824static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
1825{
1826
1827 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
1828 return false;
1829
1830 return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
1831 tsc_timestamp));
1832}
1833
1834
1835static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
1836 u64 *tsc_timestamp)
1837{
1838
1839 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
1840 return false;
1841
1842 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
1843}
1844#endif
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1888{
1889#ifdef CONFIG_X86_64
1890 struct kvm_arch *ka = &kvm->arch;
1891 int vclock_mode;
1892 bool host_tsc_clocksource, vcpus_matched;
1893
1894 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1895 atomic_read(&kvm->online_vcpus));
1896
1897
1898
1899
1900
1901 host_tsc_clocksource = kvm_get_time_and_clockread(
1902 &ka->master_kernel_ns,
1903 &ka->master_cycle_now);
1904
1905 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
1906 && !ka->backwards_tsc_observed
1907 && !ka->boot_vcpu_runs_old_kvmclock;
1908
1909 if (ka->use_master_clock)
1910 atomic_set(&kvm_guest_has_master_clock, 1);
1911
1912 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1913 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1914 vcpus_matched);
1915#endif
1916}
1917
1918void kvm_make_mclock_inprogress_request(struct kvm *kvm)
1919{
1920 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
1921}
1922
1923static void kvm_gen_update_masterclock(struct kvm *kvm)
1924{
1925#ifdef CONFIG_X86_64
1926 int i;
1927 struct kvm_vcpu *vcpu;
1928 struct kvm_arch *ka = &kvm->arch;
1929
1930 spin_lock(&ka->pvclock_gtod_sync_lock);
1931 kvm_make_mclock_inprogress_request(kvm);
1932
1933 pvclock_update_vm_gtod_copy(kvm);
1934
1935 kvm_for_each_vcpu(i, vcpu, kvm)
1936 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1937
1938
1939 kvm_for_each_vcpu(i, vcpu, kvm)
1940 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
1941
1942 spin_unlock(&ka->pvclock_gtod_sync_lock);
1943#endif
1944}
1945
1946u64 get_kvmclock_ns(struct kvm *kvm)
1947{
1948 struct kvm_arch *ka = &kvm->arch;
1949 struct pvclock_vcpu_time_info hv_clock;
1950 u64 ret;
1951
1952 spin_lock(&ka->pvclock_gtod_sync_lock);
1953 if (!ka->use_master_clock) {
1954 spin_unlock(&ka->pvclock_gtod_sync_lock);
1955 return ktime_get_boot_ns() + ka->kvmclock_offset;
1956 }
1957
1958 hv_clock.tsc_timestamp = ka->master_cycle_now;
1959 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
1960 spin_unlock(&ka->pvclock_gtod_sync_lock);
1961
1962
1963 get_cpu();
1964
1965 if (__this_cpu_read(cpu_tsc_khz)) {
1966 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
1967 &hv_clock.tsc_shift,
1968 &hv_clock.tsc_to_system_mul);
1969 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
1970 } else
1971 ret = ktime_get_boot_ns() + ka->kvmclock_offset;
1972
1973 put_cpu();
1974
1975 return ret;
1976}
1977
1978static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
1979{
1980 struct kvm_vcpu_arch *vcpu = &v->arch;
1981 struct pvclock_vcpu_time_info guest_hv_clock;
1982
1983 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1984 &guest_hv_clock, sizeof(guest_hv_clock))))
1985 return;
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2002
2003 if (guest_hv_clock.version & 1)
2004 ++guest_hv_clock.version;
2005
2006 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2007 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2008 &vcpu->hv_clock,
2009 sizeof(vcpu->hv_clock.version));
2010
2011 smp_wmb();
2012
2013
2014 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2015
2016 if (vcpu->pvclock_set_guest_stopped_request) {
2017 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2018 vcpu->pvclock_set_guest_stopped_request = false;
2019 }
2020
2021 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2022
2023 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2024 &vcpu->hv_clock,
2025 sizeof(vcpu->hv_clock));
2026
2027 smp_wmb();
2028
2029 vcpu->hv_clock.version++;
2030 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2031 &vcpu->hv_clock,
2032 sizeof(vcpu->hv_clock.version));
2033}
2034
2035static int kvm_guest_time_update(struct kvm_vcpu *v)
2036{
2037 unsigned long flags, tgt_tsc_khz;
2038 struct kvm_vcpu_arch *vcpu = &v->arch;
2039 struct kvm_arch *ka = &v->kvm->arch;
2040 s64 kernel_ns;
2041 u64 tsc_timestamp, host_tsc;
2042 u8 pvclock_flags;
2043 bool use_master_clock;
2044
2045 kernel_ns = 0;
2046 host_tsc = 0;
2047
2048
2049
2050
2051
2052 spin_lock(&ka->pvclock_gtod_sync_lock);
2053 use_master_clock = ka->use_master_clock;
2054 if (use_master_clock) {
2055 host_tsc = ka->master_cycle_now;
2056 kernel_ns = ka->master_kernel_ns;
2057 }
2058 spin_unlock(&ka->pvclock_gtod_sync_lock);
2059
2060
2061 local_irq_save(flags);
2062 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2063 if (unlikely(tgt_tsc_khz == 0)) {
2064 local_irq_restore(flags);
2065 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2066 return 1;
2067 }
2068 if (!use_master_clock) {
2069 host_tsc = rdtsc();
2070 kernel_ns = ktime_get_boot_ns();
2071 }
2072
2073 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085 if (vcpu->tsc_catchup) {
2086 u64 tsc = compute_guest_tsc(v, kernel_ns);
2087 if (tsc > tsc_timestamp) {
2088 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
2089 tsc_timestamp = tsc;
2090 }
2091 }
2092
2093 local_irq_restore(flags);
2094
2095
2096
2097 if (kvm_has_tsc_control)
2098 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
2099
2100 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
2101 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
2102 &vcpu->hv_clock.tsc_shift,
2103 &vcpu->hv_clock.tsc_to_system_mul);
2104 vcpu->hw_tsc_khz = tgt_tsc_khz;
2105 }
2106
2107 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
2108 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
2109 vcpu->last_guest_tsc = tsc_timestamp;
2110
2111
2112 pvclock_flags = 0;
2113 if (use_master_clock)
2114 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
2115
2116 vcpu->hv_clock.flags = pvclock_flags;
2117
2118 if (vcpu->pv_time_enabled)
2119 kvm_setup_pvclock_page(v);
2120 if (v == kvm_get_vcpu(v->kvm, 0))
2121 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
2122 return 0;
2123}
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
2140
2141static void kvmclock_update_fn(struct work_struct *work)
2142{
2143 int i;
2144 struct delayed_work *dwork = to_delayed_work(work);
2145 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2146 kvmclock_update_work);
2147 struct kvm *kvm = container_of(ka, struct kvm, arch);
2148 struct kvm_vcpu *vcpu;
2149
2150 kvm_for_each_vcpu(i, vcpu, kvm) {
2151 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2152 kvm_vcpu_kick(vcpu);
2153 }
2154}
2155
2156static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
2157{
2158 struct kvm *kvm = v->kvm;
2159
2160 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2161 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
2162 KVMCLOCK_UPDATE_DELAY);
2163}
2164
2165#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
2166
2167static void kvmclock_sync_fn(struct work_struct *work)
2168{
2169 struct delayed_work *dwork = to_delayed_work(work);
2170 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2171 kvmclock_sync_work);
2172 struct kvm *kvm = container_of(ka, struct kvm, arch);
2173
2174 if (!kvmclock_periodic_sync)
2175 return;
2176
2177 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
2178 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
2179 KVMCLOCK_SYNC_PERIOD);
2180}
2181
2182static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2183{
2184 u64 mcg_cap = vcpu->arch.mcg_cap;
2185 unsigned bank_num = mcg_cap & 0xff;
2186 u32 msr = msr_info->index;
2187 u64 data = msr_info->data;
2188
2189 switch (msr) {
2190 case MSR_IA32_MCG_STATUS:
2191 vcpu->arch.mcg_status = data;
2192 break;
2193 case MSR_IA32_MCG_CTL:
2194 if (!(mcg_cap & MCG_CTL_P) &&
2195 (data || !msr_info->host_initiated))
2196 return 1;
2197 if (data != 0 && data != ~(u64)0)
2198 return 1;
2199 vcpu->arch.mcg_ctl = data;
2200 break;
2201 default:
2202 if (msr >= MSR_IA32_MC0_CTL &&
2203 msr < MSR_IA32_MCx_CTL(bank_num)) {
2204 u32 offset = msr - MSR_IA32_MC0_CTL;
2205
2206
2207
2208
2209
2210 if ((offset & 0x3) == 0 &&
2211 data != 0 && (data | (1 << 10)) != ~(u64)0)
2212 return -1;
2213 if (!msr_info->host_initiated &&
2214 (offset & 0x3) == 1 && data != 0)
2215 return -1;
2216 vcpu->arch.mce_banks[offset] = data;
2217 break;
2218 }
2219 return 1;
2220 }
2221 return 0;
2222}
2223
2224static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
2225{
2226 struct kvm *kvm = vcpu->kvm;
2227 int lm = is_long_mode(vcpu);
2228 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
2229 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
2230 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
2231 : kvm->arch.xen_hvm_config.blob_size_32;
2232 u32 page_num = data & ~PAGE_MASK;
2233 u64 page_addr = data & PAGE_MASK;
2234 u8 *page;
2235 int r;
2236
2237 r = -E2BIG;
2238 if (page_num >= blob_size)
2239 goto out;
2240 r = -ENOMEM;
2241 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
2242 if (IS_ERR(page)) {
2243 r = PTR_ERR(page);
2244 goto out;
2245 }
2246 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
2247 goto out_free;
2248 r = 0;
2249out_free:
2250 kfree(page);
2251out:
2252 return r;
2253}
2254
2255static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2256{
2257 gpa_t gpa = data & ~0x3f;
2258
2259
2260 if (data & 0x38)
2261 return 1;
2262
2263 vcpu->arch.apf.msr_val = data;
2264
2265 if (!(data & KVM_ASYNC_PF_ENABLED)) {
2266 kvm_clear_async_pf_completion_queue(vcpu);
2267 kvm_async_pf_hash_reset(vcpu);
2268 return 0;
2269 }
2270
2271 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
2272 sizeof(u32)))
2273 return 1;
2274
2275 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
2276 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
2277 kvm_async_pf_wakeup_all(vcpu);
2278 return 0;
2279}
2280
2281static void kvmclock_reset(struct kvm_vcpu *vcpu)
2282{
2283 vcpu->arch.pv_time_enabled = false;
2284}
2285
2286static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
2287{
2288 ++vcpu->stat.tlb_flush;
2289 kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
2290}
2291
2292static void record_steal_time(struct kvm_vcpu *vcpu)
2293{
2294 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2295 return;
2296
2297 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2298 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2299 return;
2300
2301
2302
2303
2304
2305 if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
2306 kvm_vcpu_flush_tlb(vcpu, false);
2307
2308 if (vcpu->arch.st.steal.version & 1)
2309 vcpu->arch.st.steal.version += 1;
2310
2311 vcpu->arch.st.steal.version += 1;
2312
2313 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2314 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2315
2316 smp_wmb();
2317
2318 vcpu->arch.st.steal.steal += current->sched_info.run_delay -
2319 vcpu->arch.st.last_steal;
2320 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2321
2322 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2323 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2324
2325 smp_wmb();
2326
2327 vcpu->arch.st.steal.version += 1;
2328
2329 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2330 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2331}
2332
2333int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2334{
2335 bool pr = false;
2336 u32 msr = msr_info->index;
2337 u64 data = msr_info->data;
2338
2339 switch (msr) {
2340 case MSR_AMD64_NB_CFG:
2341 case MSR_IA32_UCODE_WRITE:
2342 case MSR_VM_HSAVE_PA:
2343 case MSR_AMD64_PATCH_LOADER:
2344 case MSR_AMD64_BU_CFG2:
2345 case MSR_AMD64_DC_CFG:
2346 break;
2347
2348 case MSR_IA32_UCODE_REV:
2349 if (msr_info->host_initiated)
2350 vcpu->arch.microcode_version = data;
2351 break;
2352 case MSR_EFER:
2353 return set_efer(vcpu, data);
2354 case MSR_K7_HWCR:
2355 data &= ~(u64)0x40;
2356 data &= ~(u64)0x100;
2357 data &= ~(u64)0x8;
2358 data &= ~(u64)0x40000;
2359 if (data != 0) {
2360 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2361 data);
2362 return 1;
2363 }
2364 break;
2365 case MSR_FAM10H_MMIO_CONF_BASE:
2366 if (data != 0) {
2367 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2368 "0x%llx\n", data);
2369 return 1;
2370 }
2371 break;
2372 case MSR_IA32_DEBUGCTLMSR:
2373 if (!data) {
2374
2375 break;
2376 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2377
2378
2379 return 1;
2380 }
2381 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2382 __func__, data);
2383 break;
2384 case 0x200 ... 0x2ff:
2385 return kvm_mtrr_set_msr(vcpu, msr, data);
2386 case MSR_IA32_APICBASE:
2387 return kvm_set_apic_base(vcpu, msr_info);
2388 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2389 return kvm_x2apic_msr_write(vcpu, msr, data);
2390 case MSR_IA32_TSCDEADLINE:
2391 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2392 break;
2393 case MSR_IA32_TSC_ADJUST:
2394 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
2395 if (!msr_info->host_initiated) {
2396 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2397 adjust_tsc_offset_guest(vcpu, adj);
2398 }
2399 vcpu->arch.ia32_tsc_adjust_msr = data;
2400 }
2401 break;
2402 case MSR_IA32_MISC_ENABLE:
2403 vcpu->arch.ia32_misc_enable_msr = data;
2404 break;
2405 case MSR_IA32_SMBASE:
2406 if (!msr_info->host_initiated)
2407 return 1;
2408 vcpu->arch.smbase = data;
2409 break;
2410 case MSR_IA32_TSC:
2411 kvm_write_tsc(vcpu, msr_info);
2412 break;
2413 case MSR_SMI_COUNT:
2414 if (!msr_info->host_initiated)
2415 return 1;
2416 vcpu->arch.smi_count = data;
2417 break;
2418 case MSR_KVM_WALL_CLOCK_NEW:
2419 case MSR_KVM_WALL_CLOCK:
2420 vcpu->kvm->arch.wall_clock = data;
2421 kvm_write_wall_clock(vcpu->kvm, data);
2422 break;
2423 case MSR_KVM_SYSTEM_TIME_NEW:
2424 case MSR_KVM_SYSTEM_TIME: {
2425 struct kvm_arch *ka = &vcpu->kvm->arch;
2426
2427 kvmclock_reset(vcpu);
2428
2429 if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
2430 bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
2431
2432 if (ka->boot_vcpu_runs_old_kvmclock != tmp)
2433 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2434
2435 ka->boot_vcpu_runs_old_kvmclock = tmp;
2436 }
2437
2438 vcpu->arch.time = data;
2439 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2440
2441
2442 if (!(data & 1))
2443 break;
2444
2445 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2446 &vcpu->arch.pv_time, data & ~1ULL,
2447 sizeof(struct pvclock_vcpu_time_info)))
2448 vcpu->arch.pv_time_enabled = false;
2449 else
2450 vcpu->arch.pv_time_enabled = true;
2451
2452 break;
2453 }
2454 case MSR_KVM_ASYNC_PF_EN:
2455 if (kvm_pv_enable_async_pf(vcpu, data))
2456 return 1;
2457 break;
2458 case MSR_KVM_STEAL_TIME:
2459
2460 if (unlikely(!sched_info_on()))
2461 return 1;
2462
2463 if (data & KVM_STEAL_RESERVED_MASK)
2464 return 1;
2465
2466 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2467 data & KVM_STEAL_VALID_BITS,
2468 sizeof(struct kvm_steal_time)))
2469 return 1;
2470
2471 vcpu->arch.st.msr_val = data;
2472
2473 if (!(data & KVM_MSR_ENABLED))
2474 break;
2475
2476 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2477
2478 break;
2479 case MSR_KVM_PV_EOI_EN:
2480 if (kvm_lapic_enable_pv_eoi(vcpu, data))
2481 return 1;
2482 break;
2483
2484 case MSR_IA32_MCG_CTL:
2485 case MSR_IA32_MCG_STATUS:
2486 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2487 return set_msr_mce(vcpu, msr_info);
2488
2489 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2490 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2491 pr = true;
2492 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2493 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2494 if (kvm_pmu_is_valid_msr(vcpu, msr))
2495 return kvm_pmu_set_msr(vcpu, msr_info);
2496
2497 if (pr || data != 0)
2498 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2499 "0x%x data 0x%llx\n", msr, data);
2500 break;
2501 case MSR_K7_CLK_CTL:
2502
2503
2504
2505
2506
2507
2508
2509
2510 break;
2511 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2512 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2513 case HV_X64_MSR_CRASH_CTL:
2514 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2515 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
2516 case HV_X64_MSR_TSC_EMULATION_CONTROL:
2517 case HV_X64_MSR_TSC_EMULATION_STATUS:
2518 return kvm_hv_set_msr_common(vcpu, msr, data,
2519 msr_info->host_initiated);
2520 case MSR_IA32_BBL_CR_CTL3:
2521
2522
2523
2524 if (report_ignored_msrs)
2525 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
2526 msr, data);
2527 break;
2528 case MSR_AMD64_OSVW_ID_LENGTH:
2529 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2530 return 1;
2531 vcpu->arch.osvw.length = data;
2532 break;
2533 case MSR_AMD64_OSVW_STATUS:
2534 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2535 return 1;
2536 vcpu->arch.osvw.status = data;
2537 break;
2538 case MSR_PLATFORM_INFO:
2539 if (!msr_info->host_initiated ||
2540 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
2541 cpuid_fault_enabled(vcpu)))
2542 return 1;
2543 vcpu->arch.msr_platform_info = data;
2544 break;
2545 case MSR_MISC_FEATURES_ENABLES:
2546 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
2547 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
2548 !supports_cpuid_fault(vcpu)))
2549 return 1;
2550 vcpu->arch.msr_misc_features_enables = data;
2551 break;
2552 default:
2553 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2554 return xen_hvm_config(vcpu, data);
2555 if (kvm_pmu_is_valid_msr(vcpu, msr))
2556 return kvm_pmu_set_msr(vcpu, msr_info);
2557 if (!ignore_msrs) {
2558 vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
2559 msr, data);
2560 return 1;
2561 } else {
2562 if (report_ignored_msrs)
2563 vcpu_unimpl(vcpu,
2564 "ignored wrmsr: 0x%x data 0x%llx\n",
2565 msr, data);
2566 break;
2567 }
2568 }
2569 return 0;
2570}
2571EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2572
2573
2574
2575
2576
2577
2578
2579int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
2580{
2581 return kvm_x86_ops->get_msr(vcpu, msr);
2582}
2583EXPORT_SYMBOL_GPL(kvm_get_msr);
2584
2585static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
2586{
2587 u64 data;
2588 u64 mcg_cap = vcpu->arch.mcg_cap;
2589 unsigned bank_num = mcg_cap & 0xff;
2590
2591 switch (msr) {
2592 case MSR_IA32_P5_MC_ADDR:
2593 case MSR_IA32_P5_MC_TYPE:
2594 data = 0;
2595 break;
2596 case MSR_IA32_MCG_CAP:
2597 data = vcpu->arch.mcg_cap;
2598 break;
2599 case MSR_IA32_MCG_CTL:
2600 if (!(mcg_cap & MCG_CTL_P) && !host)
2601 return 1;
2602 data = vcpu->arch.mcg_ctl;
2603 break;
2604 case MSR_IA32_MCG_STATUS:
2605 data = vcpu->arch.mcg_status;
2606 break;
2607 default:
2608 if (msr >= MSR_IA32_MC0_CTL &&
2609 msr < MSR_IA32_MCx_CTL(bank_num)) {
2610 u32 offset = msr - MSR_IA32_MC0_CTL;
2611 data = vcpu->arch.mce_banks[offset];
2612 break;
2613 }
2614 return 1;
2615 }
2616 *pdata = data;
2617 return 0;
2618}
2619
2620int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2621{
2622 switch (msr_info->index) {
2623 case MSR_IA32_PLATFORM_ID:
2624 case MSR_IA32_EBL_CR_POWERON:
2625 case MSR_IA32_DEBUGCTLMSR:
2626 case MSR_IA32_LASTBRANCHFROMIP:
2627 case MSR_IA32_LASTBRANCHTOIP:
2628 case MSR_IA32_LASTINTFROMIP:
2629 case MSR_IA32_LASTINTTOIP:
2630 case MSR_K8_SYSCFG:
2631 case MSR_K8_TSEG_ADDR:
2632 case MSR_K8_TSEG_MASK:
2633 case MSR_K7_HWCR:
2634 case MSR_VM_HSAVE_PA:
2635 case MSR_K8_INT_PENDING_MSG:
2636 case MSR_AMD64_NB_CFG:
2637 case MSR_FAM10H_MMIO_CONF_BASE:
2638 case MSR_AMD64_BU_CFG2:
2639 case MSR_IA32_PERF_CTL:
2640 case MSR_AMD64_DC_CFG:
2641 msr_info->data = 0;
2642 break;
2643 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
2644 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2645 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2646 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2647 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2648 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2649 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2650 msr_info->data = 0;
2651 break;
2652 case MSR_IA32_UCODE_REV:
2653 msr_info->data = vcpu->arch.microcode_version;
2654 break;
2655 case MSR_IA32_TSC:
2656 msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
2657 break;
2658 case MSR_MTRRcap:
2659 case 0x200 ... 0x2ff:
2660 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
2661 case 0xcd:
2662 msr_info->data = 3;
2663 break;
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675 case MSR_EBC_FREQUENCY_ID:
2676 msr_info->data = 1 << 24;
2677 break;
2678 case MSR_IA32_APICBASE:
2679 msr_info->data = kvm_get_apic_base(vcpu);
2680 break;
2681 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2682 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
2683 break;
2684 case MSR_IA32_TSCDEADLINE:
2685 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
2686 break;
2687 case MSR_IA32_TSC_ADJUST:
2688 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2689 break;
2690 case MSR_IA32_MISC_ENABLE:
2691 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
2692 break;
2693 case MSR_IA32_SMBASE:
2694 if (!msr_info->host_initiated)
2695 return 1;
2696 msr_info->data = vcpu->arch.smbase;
2697 break;
2698 case MSR_SMI_COUNT:
2699 msr_info->data = vcpu->arch.smi_count;
2700 break;
2701 case MSR_IA32_PERF_STATUS:
2702
2703 msr_info->data = 1000ULL;
2704
2705 msr_info->data |= (((uint64_t)4ULL) << 40);
2706 break;
2707 case MSR_EFER:
2708 msr_info->data = vcpu->arch.efer;
2709 break;
2710 case MSR_KVM_WALL_CLOCK:
2711 case MSR_KVM_WALL_CLOCK_NEW:
2712 msr_info->data = vcpu->kvm->arch.wall_clock;
2713 break;
2714 case MSR_KVM_SYSTEM_TIME:
2715 case MSR_KVM_SYSTEM_TIME_NEW:
2716 msr_info->data = vcpu->arch.time;
2717 break;
2718 case MSR_KVM_ASYNC_PF_EN:
2719 msr_info->data = vcpu->arch.apf.msr_val;
2720 break;
2721 case MSR_KVM_STEAL_TIME:
2722 msr_info->data = vcpu->arch.st.msr_val;
2723 break;
2724 case MSR_KVM_PV_EOI_EN:
2725 msr_info->data = vcpu->arch.pv_eoi.msr_val;
2726 break;
2727 case MSR_IA32_P5_MC_ADDR:
2728 case MSR_IA32_P5_MC_TYPE:
2729 case MSR_IA32_MCG_CAP:
2730 case MSR_IA32_MCG_CTL:
2731 case MSR_IA32_MCG_STATUS:
2732 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2733 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
2734 msr_info->host_initiated);
2735 case MSR_K7_CLK_CTL:
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745 msr_info->data = 0x20000000;
2746 break;
2747 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2748 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2749 case HV_X64_MSR_CRASH_CTL:
2750 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2751 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
2752 case HV_X64_MSR_TSC_EMULATION_CONTROL:
2753 case HV_X64_MSR_TSC_EMULATION_STATUS:
2754 return kvm_hv_get_msr_common(vcpu,
2755 msr_info->index, &msr_info->data,
2756 msr_info->host_initiated);
2757 break;
2758 case MSR_IA32_BBL_CR_CTL3:
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769 msr_info->data = 0xbe702111;
2770 break;
2771 case MSR_AMD64_OSVW_ID_LENGTH:
2772 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2773 return 1;
2774 msr_info->data = vcpu->arch.osvw.length;
2775 break;
2776 case MSR_AMD64_OSVW_STATUS:
2777 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2778 return 1;
2779 msr_info->data = vcpu->arch.osvw.status;
2780 break;
2781 case MSR_PLATFORM_INFO:
2782 if (!msr_info->host_initiated &&
2783 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
2784 return 1;
2785 msr_info->data = vcpu->arch.msr_platform_info;
2786 break;
2787 case MSR_MISC_FEATURES_ENABLES:
2788 msr_info->data = vcpu->arch.msr_misc_features_enables;
2789 break;
2790 default:
2791 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2792 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2793 if (!ignore_msrs) {
2794 vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
2795 msr_info->index);
2796 return 1;
2797 } else {
2798 if (report_ignored_msrs)
2799 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
2800 msr_info->index);
2801 msr_info->data = 0;
2802 }
2803 break;
2804 }
2805 return 0;
2806}
2807EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2808
2809
2810
2811
2812
2813
2814static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2815 struct kvm_msr_entry *entries,
2816 int (*do_msr)(struct kvm_vcpu *vcpu,
2817 unsigned index, u64 *data))
2818{
2819 int i;
2820
2821 for (i = 0; i < msrs->nmsrs; ++i)
2822 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2823 break;
2824
2825 return i;
2826}
2827
2828
2829
2830
2831
2832
2833static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2834 int (*do_msr)(struct kvm_vcpu *vcpu,
2835 unsigned index, u64 *data),
2836 int writeback)
2837{
2838 struct kvm_msrs msrs;
2839 struct kvm_msr_entry *entries;
2840 int r, n;
2841 unsigned size;
2842
2843 r = -EFAULT;
2844 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2845 goto out;
2846
2847 r = -E2BIG;
2848 if (msrs.nmsrs >= MAX_IO_MSRS)
2849 goto out;
2850
2851 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2852 entries = memdup_user(user_msrs->entries, size);
2853 if (IS_ERR(entries)) {
2854 r = PTR_ERR(entries);
2855 goto out;
2856 }
2857
2858 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2859 if (r < 0)
2860 goto out_free;
2861
2862 r = -EFAULT;
2863 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2864 goto out_free;
2865
2866 r = n;
2867
2868out_free:
2869 kfree(entries);
2870out:
2871 return r;
2872}
2873
2874static inline bool kvm_can_mwait_in_guest(void)
2875{
2876 return boot_cpu_has(X86_FEATURE_MWAIT) &&
2877 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
2878 boot_cpu_has(X86_FEATURE_ARAT);
2879}
2880
2881int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2882{
2883 int r = 0;
2884
2885 switch (ext) {
2886 case KVM_CAP_IRQCHIP:
2887 case KVM_CAP_HLT:
2888 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2889 case KVM_CAP_SET_TSS_ADDR:
2890 case KVM_CAP_EXT_CPUID:
2891 case KVM_CAP_EXT_EMUL_CPUID:
2892 case KVM_CAP_CLOCKSOURCE:
2893 case KVM_CAP_PIT:
2894 case KVM_CAP_NOP_IO_DELAY:
2895 case KVM_CAP_MP_STATE:
2896 case KVM_CAP_SYNC_MMU:
2897 case KVM_CAP_USER_NMI:
2898 case KVM_CAP_REINJECT_CONTROL:
2899 case KVM_CAP_IRQ_INJECT_STATUS:
2900 case KVM_CAP_IOEVENTFD:
2901 case KVM_CAP_IOEVENTFD_NO_LENGTH:
2902 case KVM_CAP_PIT2:
2903 case KVM_CAP_PIT_STATE2:
2904 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2905 case KVM_CAP_XEN_HVM:
2906 case KVM_CAP_VCPU_EVENTS:
2907 case KVM_CAP_HYPERV:
2908 case KVM_CAP_HYPERV_VAPIC:
2909 case KVM_CAP_HYPERV_SPIN:
2910 case KVM_CAP_HYPERV_SYNIC:
2911 case KVM_CAP_HYPERV_SYNIC2:
2912 case KVM_CAP_HYPERV_VP_INDEX:
2913 case KVM_CAP_HYPERV_EVENTFD:
2914 case KVM_CAP_HYPERV_TLBFLUSH:
2915 case KVM_CAP_PCI_SEGMENT:
2916 case KVM_CAP_DEBUGREGS:
2917 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2918 case KVM_CAP_XSAVE:
2919 case KVM_CAP_ASYNC_PF:
2920 case KVM_CAP_GET_TSC_KHZ:
2921 case KVM_CAP_KVMCLOCK_CTRL:
2922 case KVM_CAP_READONLY_MEM:
2923 case KVM_CAP_HYPERV_TIME:
2924 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
2925 case KVM_CAP_TSC_DEADLINE_TIMER:
2926 case KVM_CAP_ENABLE_CAP_VM:
2927 case KVM_CAP_DISABLE_QUIRKS:
2928 case KVM_CAP_SET_BOOT_CPU_ID:
2929 case KVM_CAP_SPLIT_IRQCHIP:
2930 case KVM_CAP_IMMEDIATE_EXIT:
2931 case KVM_CAP_GET_MSR_FEATURES:
2932 case KVM_CAP_MSR_PLATFORM_INFO:
2933 r = 1;
2934 break;
2935 case KVM_CAP_SYNC_REGS:
2936 r = KVM_SYNC_X86_VALID_FIELDS;
2937 break;
2938 case KVM_CAP_ADJUST_CLOCK:
2939 r = KVM_CLOCK_TSC_STABLE;
2940 break;
2941 case KVM_CAP_X86_DISABLE_EXITS:
2942 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
2943 if(kvm_can_mwait_in_guest())
2944 r |= KVM_X86_DISABLE_EXITS_MWAIT;
2945 break;
2946 case KVM_CAP_X86_SMM:
2947
2948
2949
2950
2951
2952
2953
2954
2955 r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
2956 break;
2957 case KVM_CAP_VAPIC:
2958 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2959 break;
2960 case KVM_CAP_NR_VCPUS:
2961 r = KVM_SOFT_MAX_VCPUS;
2962 break;
2963 case KVM_CAP_MAX_VCPUS:
2964 r = KVM_MAX_VCPUS;
2965 break;
2966 case KVM_CAP_NR_MEMSLOTS:
2967 r = KVM_USER_MEM_SLOTS;
2968 break;
2969 case KVM_CAP_PV_MMU:
2970 r = 0;
2971 break;
2972 case KVM_CAP_MCE:
2973 r = KVM_MAX_MCE_BANKS;
2974 break;
2975 case KVM_CAP_XCRS:
2976 r = boot_cpu_has(X86_FEATURE_XSAVE);
2977 break;
2978 case KVM_CAP_TSC_CONTROL:
2979 r = kvm_has_tsc_control;
2980 break;
2981 case KVM_CAP_X2APIC_API:
2982 r = KVM_X2APIC_API_VALID_FLAGS;
2983 break;
2984 case KVM_CAP_NESTED_STATE:
2985 r = kvm_x86_ops->get_nested_state ?
2986 kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
2987 break;
2988 default:
2989 break;
2990 }
2991 return r;
2992
2993}
2994
2995long kvm_arch_dev_ioctl(struct file *filp,
2996 unsigned int ioctl, unsigned long arg)
2997{
2998 void __user *argp = (void __user *)arg;
2999 long r;
3000
3001 switch (ioctl) {
3002 case KVM_GET_MSR_INDEX_LIST: {
3003 struct kvm_msr_list __user *user_msr_list = argp;
3004 struct kvm_msr_list msr_list;
3005 unsigned n;
3006
3007 r = -EFAULT;
3008 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
3009 goto out;
3010 n = msr_list.nmsrs;
3011 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
3012 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
3013 goto out;
3014 r = -E2BIG;
3015 if (n < msr_list.nmsrs)
3016 goto out;
3017 r = -EFAULT;
3018 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
3019 num_msrs_to_save * sizeof(u32)))
3020 goto out;
3021 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
3022 &emulated_msrs,
3023 num_emulated_msrs * sizeof(u32)))
3024 goto out;
3025 r = 0;
3026 break;
3027 }
3028 case KVM_GET_SUPPORTED_CPUID:
3029 case KVM_GET_EMULATED_CPUID: {
3030 struct kvm_cpuid2 __user *cpuid_arg = argp;
3031 struct kvm_cpuid2 cpuid;
3032
3033 r = -EFAULT;
3034 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3035 goto out;
3036
3037 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
3038 ioctl);
3039 if (r)
3040 goto out;
3041
3042 r = -EFAULT;
3043 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3044 goto out;
3045 r = 0;
3046 break;
3047 }
3048 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
3049 r = -EFAULT;
3050 if (copy_to_user(argp, &kvm_mce_cap_supported,
3051 sizeof(kvm_mce_cap_supported)))
3052 goto out;
3053 r = 0;
3054 break;
3055 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
3056 struct kvm_msr_list __user *user_msr_list = argp;
3057 struct kvm_msr_list msr_list;
3058 unsigned int n;
3059
3060 r = -EFAULT;
3061 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3062 goto out;
3063 n = msr_list.nmsrs;
3064 msr_list.nmsrs = num_msr_based_features;
3065 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3066 goto out;
3067 r = -E2BIG;
3068 if (n < msr_list.nmsrs)
3069 goto out;
3070 r = -EFAULT;
3071 if (copy_to_user(user_msr_list->indices, &msr_based_features,
3072 num_msr_based_features * sizeof(u32)))
3073 goto out;
3074 r = 0;
3075 break;
3076 }
3077 case KVM_GET_MSRS:
3078 r = msr_io(NULL, argp, do_get_msr_feature, 1);
3079 break;
3080 }
3081 default:
3082 r = -EINVAL;
3083 }
3084out:
3085 return r;
3086}
3087
3088static void wbinvd_ipi(void *garbage)
3089{
3090 wbinvd();
3091}
3092
3093static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
3094{
3095 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
3096}
3097
3098void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3099{
3100
3101 if (need_emulate_wbinvd(vcpu)) {
3102 if (kvm_x86_ops->has_wbinvd_exit())
3103 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
3104 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
3105 smp_call_function_single(vcpu->cpu,
3106 wbinvd_ipi, NULL, 1);
3107 }
3108
3109 kvm_x86_ops->vcpu_load(vcpu, cpu);
3110
3111
3112 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
3113 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
3114 vcpu->arch.tsc_offset_adjustment = 0;
3115 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3116 }
3117
3118 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
3119 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
3120 rdtsc() - vcpu->arch.last_host_tsc;
3121 if (tsc_delta < 0)
3122 mark_tsc_unstable("KVM discovered backwards TSC");
3123
3124 if (kvm_check_tsc_unstable()) {
3125 u64 offset = kvm_compute_tsc_offset(vcpu,
3126 vcpu->arch.last_guest_tsc);
3127 kvm_vcpu_write_tsc_offset(vcpu, offset);
3128 vcpu->arch.tsc_catchup = 1;
3129 }
3130
3131 if (kvm_lapic_hv_timer_in_use(vcpu))
3132 kvm_lapic_restart_hv_timer(vcpu);
3133
3134
3135
3136
3137
3138 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
3139 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
3140 if (vcpu->cpu != cpu)
3141 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
3142 vcpu->cpu = cpu;
3143 }
3144
3145 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3146}
3147
3148static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
3149{
3150 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3151 return;
3152
3153 vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
3154
3155 kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
3156 &vcpu->arch.st.steal.preempted,
3157 offsetof(struct kvm_steal_time, preempted),
3158 sizeof(vcpu->arch.st.steal.preempted));
3159}
3160
3161void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3162{
3163 int idx;
3164
3165 if (vcpu->preempted)
3166 vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176 pagefault_disable();
3177
3178
3179
3180
3181 idx = srcu_read_lock(&vcpu->kvm->srcu);
3182 kvm_steal_time_set_preempted(vcpu);
3183 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3184 pagefault_enable();
3185 kvm_x86_ops->vcpu_put(vcpu);
3186 vcpu->arch.last_host_tsc = rdtsc();
3187
3188
3189
3190
3191
3192 set_debugreg(0, 6);
3193}
3194
3195static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
3196 struct kvm_lapic_state *s)
3197{
3198 if (vcpu->arch.apicv_active)
3199 kvm_x86_ops->sync_pir_to_irr(vcpu);
3200
3201 return kvm_apic_get_state(vcpu, s);
3202}
3203
3204static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
3205 struct kvm_lapic_state *s)
3206{
3207 int r;
3208
3209 r = kvm_apic_set_state(vcpu, s);
3210 if (r)
3211 return r;
3212 update_cr8_intercept(vcpu);
3213
3214 return 0;
3215}
3216
3217static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
3218{
3219 return (!lapic_in_kernel(vcpu) ||
3220 kvm_apic_accept_pic_intr(vcpu));
3221}
3222
3223
3224
3225
3226
3227
3228
3229static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
3230{
3231 return kvm_arch_interrupt_allowed(vcpu) &&
3232 !kvm_cpu_has_interrupt(vcpu) &&
3233 !kvm_event_needs_reinjection(vcpu) &&
3234 kvm_cpu_accept_dm_intr(vcpu);
3235}
3236
3237static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
3238 struct kvm_interrupt *irq)
3239{
3240 if (irq->irq >= KVM_NR_INTERRUPTS)
3241 return -EINVAL;
3242
3243 if (!irqchip_in_kernel(vcpu->kvm)) {
3244 kvm_queue_interrupt(vcpu, irq->irq, false);
3245 kvm_make_request(KVM_REQ_EVENT, vcpu);
3246 return 0;
3247 }
3248
3249
3250
3251
3252
3253 if (pic_in_kernel(vcpu->kvm))
3254 return -ENXIO;
3255
3256 if (vcpu->arch.pending_external_vector != -1)
3257 return -EEXIST;
3258
3259 vcpu->arch.pending_external_vector = irq->irq;
3260 kvm_make_request(KVM_REQ_EVENT, vcpu);
3261 return 0;
3262}
3263
3264static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
3265{
3266 kvm_inject_nmi(vcpu);
3267
3268 return 0;
3269}
3270
3271static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
3272{
3273 kvm_make_request(KVM_REQ_SMI, vcpu);
3274
3275 return 0;
3276}
3277
3278static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
3279 struct kvm_tpr_access_ctl *tac)
3280{
3281 if (tac->flags)
3282 return -EINVAL;
3283 vcpu->arch.tpr_access_reporting = !!tac->enabled;
3284 return 0;
3285}
3286
3287static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
3288 u64 mcg_cap)
3289{
3290 int r;
3291 unsigned bank_num = mcg_cap & 0xff, bank;
3292
3293 r = -EINVAL;
3294 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
3295 goto out;
3296 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
3297 goto out;
3298 r = 0;
3299 vcpu->arch.mcg_cap = mcg_cap;
3300
3301 if (mcg_cap & MCG_CTL_P)
3302 vcpu->arch.mcg_ctl = ~(u64)0;
3303
3304 for (bank = 0; bank < bank_num; bank++)
3305 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
3306
3307 if (kvm_x86_ops->setup_mce)
3308 kvm_x86_ops->setup_mce(vcpu);
3309out:
3310 return r;
3311}
3312
3313static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
3314 struct kvm_x86_mce *mce)
3315{
3316 u64 mcg_cap = vcpu->arch.mcg_cap;
3317 unsigned bank_num = mcg_cap & 0xff;
3318 u64 *banks = vcpu->arch.mce_banks;
3319
3320 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
3321 return -EINVAL;
3322
3323
3324
3325
3326 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
3327 vcpu->arch.mcg_ctl != ~(u64)0)
3328 return 0;
3329 banks += 4 * mce->bank;
3330
3331
3332
3333
3334 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
3335 return 0;
3336 if (mce->status & MCI_STATUS_UC) {
3337 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
3338 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
3339 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
3340 return 0;
3341 }
3342 if (banks[1] & MCI_STATUS_VAL)
3343 mce->status |= MCI_STATUS_OVER;
3344 banks[2] = mce->addr;
3345 banks[3] = mce->misc;
3346 vcpu->arch.mcg_status = mce->mcg_status;
3347 banks[1] = mce->status;
3348 kvm_queue_exception(vcpu, MC_VECTOR);
3349 } else if (!(banks[1] & MCI_STATUS_VAL)
3350 || !(banks[1] & MCI_STATUS_UC)) {
3351 if (banks[1] & MCI_STATUS_VAL)
3352 mce->status |= MCI_STATUS_OVER;
3353 banks[2] = mce->addr;
3354 banks[3] = mce->misc;
3355 banks[1] = mce->status;
3356 } else
3357 banks[1] |= MCI_STATUS_OVER;
3358 return 0;
3359}
3360
3361static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
3362 struct kvm_vcpu_events *events)
3363{
3364 process_nmi(vcpu);
3365
3366
3367
3368
3369
3370 events->exception.injected =
3371 (vcpu->arch.exception.pending ||
3372 vcpu->arch.exception.injected) &&
3373 !kvm_exception_is_soft(vcpu->arch.exception.nr);
3374 events->exception.nr = vcpu->arch.exception.nr;
3375 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
3376 events->exception.pad = 0;
3377 events->exception.error_code = vcpu->arch.exception.error_code;
3378
3379 events->interrupt.injected =
3380 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
3381 events->interrupt.nr = vcpu->arch.interrupt.nr;
3382 events->interrupt.soft = 0;
3383 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
3384
3385 events->nmi.injected = vcpu->arch.nmi_injected;
3386 events->nmi.pending = vcpu->arch.nmi_pending != 0;
3387 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
3388 events->nmi.pad = 0;
3389
3390 events->sipi_vector = 0;
3391
3392 events->smi.smm = is_smm(vcpu);
3393 events->smi.pending = vcpu->arch.smi_pending;
3394 events->smi.smm_inside_nmi =
3395 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
3396 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
3397
3398 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
3399 | KVM_VCPUEVENT_VALID_SHADOW
3400 | KVM_VCPUEVENT_VALID_SMM);
3401 memset(&events->reserved, 0, sizeof(events->reserved));
3402}
3403
3404static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
3405
3406static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3407 struct kvm_vcpu_events *events)
3408{
3409 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
3410 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
3411 | KVM_VCPUEVENT_VALID_SHADOW
3412 | KVM_VCPUEVENT_VALID_SMM))
3413 return -EINVAL;
3414
3415 if (events->exception.injected &&
3416 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
3417 is_guest_mode(vcpu)))
3418 return -EINVAL;
3419
3420
3421 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
3422 (events->smi.smm || events->smi.pending) &&
3423 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3424 return -EINVAL;
3425
3426 process_nmi(vcpu);
3427 vcpu->arch.exception.injected = false;
3428 vcpu->arch.exception.pending = events->exception.injected;
3429 vcpu->arch.exception.nr = events->exception.nr;
3430 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
3431 vcpu->arch.exception.error_code = events->exception.error_code;
3432
3433 vcpu->arch.interrupt.injected = events->interrupt.injected;
3434 vcpu->arch.interrupt.nr = events->interrupt.nr;
3435 vcpu->arch.interrupt.soft = events->interrupt.soft;
3436 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
3437 kvm_x86_ops->set_interrupt_shadow(vcpu,
3438 events->interrupt.shadow);
3439
3440 vcpu->arch.nmi_injected = events->nmi.injected;
3441 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
3442 vcpu->arch.nmi_pending = events->nmi.pending;
3443 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
3444
3445 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
3446 lapic_in_kernel(vcpu))
3447 vcpu->arch.apic->sipi_vector = events->sipi_vector;
3448
3449 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
3450 u32 hflags = vcpu->arch.hflags;
3451 if (events->smi.smm)
3452 hflags |= HF_SMM_MASK;
3453 else
3454 hflags &= ~HF_SMM_MASK;
3455 kvm_set_hflags(vcpu, hflags);
3456
3457 vcpu->arch.smi_pending = events->smi.pending;
3458
3459 if (events->smi.smm) {
3460 if (events->smi.smm_inside_nmi)
3461 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
3462 else
3463 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
3464 if (lapic_in_kernel(vcpu)) {
3465 if (events->smi.latched_init)
3466 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3467 else
3468 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3469 }
3470 }
3471 }
3472
3473 kvm_make_request(KVM_REQ_EVENT, vcpu);
3474
3475 return 0;
3476}
3477
3478static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3479 struct kvm_debugregs *dbgregs)
3480{
3481 unsigned long val;
3482
3483 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3484 kvm_get_dr(vcpu, 6, &val);
3485 dbgregs->dr6 = val;
3486 dbgregs->dr7 = vcpu->arch.dr7;
3487 dbgregs->flags = 0;
3488 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
3489}
3490
3491static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3492 struct kvm_debugregs *dbgregs)
3493{
3494 if (dbgregs->flags)
3495 return -EINVAL;
3496
3497 if (dbgregs->dr6 & ~0xffffffffull)
3498 return -EINVAL;
3499 if (dbgregs->dr7 & ~0xffffffffull)
3500 return -EINVAL;
3501
3502 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
3503 kvm_update_dr0123(vcpu);
3504 vcpu->arch.dr6 = dbgregs->dr6;
3505 kvm_update_dr6(vcpu);
3506 vcpu->arch.dr7 = dbgregs->dr7;
3507 kvm_update_dr7(vcpu);
3508
3509 return 0;
3510}
3511
3512#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
3513
3514static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3515{
3516 struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
3517 u64 xstate_bv = xsave->header.xfeatures;
3518 u64 valid;
3519
3520
3521
3522
3523
3524 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
3525
3526
3527 xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
3528 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
3529
3530
3531
3532
3533
3534 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3535 while (valid) {
3536 u64 feature = valid & -valid;
3537 int index = fls64(feature) - 1;
3538 void *src = get_xsave_addr(xsave, feature);
3539
3540 if (src) {
3541 u32 size, offset, ecx, edx;
3542 cpuid_count(XSTATE_CPUID, index,
3543 &size, &offset, &ecx, &edx);
3544 if (feature == XFEATURE_MASK_PKRU)
3545 memcpy(dest + offset, &vcpu->arch.pkru,
3546 sizeof(vcpu->arch.pkru));
3547 else
3548 memcpy(dest + offset, src, size);
3549
3550 }
3551
3552 valid -= feature;
3553 }
3554}
3555
3556static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3557{
3558 struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
3559 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
3560 u64 valid;
3561
3562
3563
3564
3565
3566 memcpy(xsave, src, XSAVE_HDR_OFFSET);
3567
3568
3569 xsave->header.xfeatures = xstate_bv;
3570 if (boot_cpu_has(X86_FEATURE_XSAVES))
3571 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
3572
3573
3574
3575
3576
3577 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3578 while (valid) {
3579 u64 feature = valid & -valid;
3580 int index = fls64(feature) - 1;
3581 void *dest = get_xsave_addr(xsave, feature);
3582
3583 if (dest) {
3584 u32 size, offset, ecx, edx;
3585 cpuid_count(XSTATE_CPUID, index,
3586 &size, &offset, &ecx, &edx);
3587 if (feature == XFEATURE_MASK_PKRU)
3588 memcpy(&vcpu->arch.pkru, src + offset,
3589 sizeof(vcpu->arch.pkru));
3590 else
3591 memcpy(dest, src + offset, size);
3592 }
3593
3594 valid -= feature;
3595 }
3596}
3597
3598static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3599 struct kvm_xsave *guest_xsave)
3600{
3601 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3602 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
3603 fill_xsave((u8 *) guest_xsave->region, vcpu);
3604 } else {
3605 memcpy(guest_xsave->region,
3606 &vcpu->arch.guest_fpu.state.fxsave,
3607 sizeof(struct fxregs_state));
3608 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
3609 XFEATURE_MASK_FPSSE;
3610 }
3611}
3612
3613#define XSAVE_MXCSR_OFFSET 24
3614
3615static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3616 struct kvm_xsave *guest_xsave)
3617{
3618 u64 xstate_bv =
3619 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3620 u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
3621
3622 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3623
3624
3625
3626
3627
3628 if (xstate_bv & ~kvm_supported_xcr0() ||
3629 mxcsr & ~mxcsr_feature_mask)
3630 return -EINVAL;
3631 load_xsave(vcpu, (u8 *)guest_xsave->region);
3632 } else {
3633 if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
3634 mxcsr & ~mxcsr_feature_mask)
3635 return -EINVAL;
3636 memcpy(&vcpu->arch.guest_fpu.state.fxsave,
3637 guest_xsave->region, sizeof(struct fxregs_state));
3638 }
3639 return 0;
3640}
3641
3642static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
3643 struct kvm_xcrs *guest_xcrs)
3644{
3645 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
3646 guest_xcrs->nr_xcrs = 0;
3647 return;
3648 }
3649
3650 guest_xcrs->nr_xcrs = 1;
3651 guest_xcrs->flags = 0;
3652 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
3653 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
3654}
3655
3656static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3657 struct kvm_xcrs *guest_xcrs)
3658{
3659 int i, r = 0;
3660
3661 if (!boot_cpu_has(X86_FEATURE_XSAVE))
3662 return -EINVAL;
3663
3664 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
3665 return -EINVAL;
3666
3667 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3668
3669 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3670 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3671 guest_xcrs->xcrs[i].value);
3672 break;
3673 }
3674 if (r)
3675 r = -EINVAL;
3676 return r;
3677}
3678
3679
3680
3681
3682
3683
3684
3685static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
3686{
3687 if (!vcpu->arch.pv_time_enabled)
3688 return -EINVAL;
3689 vcpu->arch.pvclock_set_guest_stopped_request = true;
3690 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3691 return 0;
3692}
3693
3694static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3695 struct kvm_enable_cap *cap)
3696{
3697 if (cap->flags)
3698 return -EINVAL;
3699
3700 switch (cap->cap) {
3701 case KVM_CAP_HYPERV_SYNIC2:
3702 if (cap->args[0])
3703 return -EINVAL;
3704 case KVM_CAP_HYPERV_SYNIC:
3705 if (!irqchip_in_kernel(vcpu->kvm))
3706 return -EINVAL;
3707 return kvm_hv_activate_synic(vcpu, cap->cap ==
3708 KVM_CAP_HYPERV_SYNIC2);
3709 default:
3710 return -EINVAL;
3711 }
3712}
3713
3714long kvm_arch_vcpu_ioctl(struct file *filp,
3715 unsigned int ioctl, unsigned long arg)
3716{
3717 struct kvm_vcpu *vcpu = filp->private_data;
3718 void __user *argp = (void __user *)arg;
3719 int r;
3720 union {
3721 struct kvm_lapic_state *lapic;
3722 struct kvm_xsave *xsave;
3723 struct kvm_xcrs *xcrs;
3724 void *buffer;
3725 } u;
3726
3727 vcpu_load(vcpu);
3728
3729 u.buffer = NULL;
3730 switch (ioctl) {
3731 case KVM_GET_LAPIC: {
3732 r = -EINVAL;
3733 if (!lapic_in_kernel(vcpu))
3734 goto out;
3735 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3736
3737 r = -ENOMEM;
3738 if (!u.lapic)
3739 goto out;
3740 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3741 if (r)
3742 goto out;
3743 r = -EFAULT;
3744 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3745 goto out;
3746 r = 0;
3747 break;
3748 }
3749 case KVM_SET_LAPIC: {
3750 r = -EINVAL;
3751 if (!lapic_in_kernel(vcpu))
3752 goto out;
3753 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3754 if (IS_ERR(u.lapic)) {
3755 r = PTR_ERR(u.lapic);
3756 goto out_nofree;
3757 }
3758
3759 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3760 break;
3761 }
3762 case KVM_INTERRUPT: {
3763 struct kvm_interrupt irq;
3764
3765 r = -EFAULT;
3766 if (copy_from_user(&irq, argp, sizeof irq))
3767 goto out;
3768 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3769 break;
3770 }
3771 case KVM_NMI: {
3772 r = kvm_vcpu_ioctl_nmi(vcpu);
3773 break;
3774 }
3775 case KVM_SMI: {
3776 r = kvm_vcpu_ioctl_smi(vcpu);
3777 break;
3778 }
3779 case KVM_SET_CPUID: {
3780 struct kvm_cpuid __user *cpuid_arg = argp;
3781 struct kvm_cpuid cpuid;
3782
3783 r = -EFAULT;
3784 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3785 goto out;
3786 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3787 break;
3788 }
3789 case KVM_SET_CPUID2: {
3790 struct kvm_cpuid2 __user *cpuid_arg = argp;
3791 struct kvm_cpuid2 cpuid;
3792
3793 r = -EFAULT;
3794 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3795 goto out;
3796 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3797 cpuid_arg->entries);
3798 break;
3799 }
3800 case KVM_GET_CPUID2: {
3801 struct kvm_cpuid2 __user *cpuid_arg = argp;
3802 struct kvm_cpuid2 cpuid;
3803
3804 r = -EFAULT;
3805 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3806 goto out;
3807 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3808 cpuid_arg->entries);
3809 if (r)
3810 goto out;
3811 r = -EFAULT;
3812 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3813 goto out;
3814 r = 0;
3815 break;
3816 }
3817 case KVM_GET_MSRS: {
3818 int idx = srcu_read_lock(&vcpu->kvm->srcu);
3819 r = msr_io(vcpu, argp, do_get_msr, 1);
3820 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3821 break;
3822 }
3823 case KVM_SET_MSRS: {
3824 int idx = srcu_read_lock(&vcpu->kvm->srcu);
3825 r = msr_io(vcpu, argp, do_set_msr, 0);
3826 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3827 break;
3828 }
3829 case KVM_TPR_ACCESS_REPORTING: {
3830 struct kvm_tpr_access_ctl tac;
3831
3832 r = -EFAULT;
3833 if (copy_from_user(&tac, argp, sizeof tac))
3834 goto out;
3835 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3836 if (r)
3837 goto out;
3838 r = -EFAULT;
3839 if (copy_to_user(argp, &tac, sizeof tac))
3840 goto out;
3841 r = 0;
3842 break;
3843 };
3844 case KVM_SET_VAPIC_ADDR: {
3845 struct kvm_vapic_addr va;
3846 int idx;
3847
3848 r = -EINVAL;
3849 if (!lapic_in_kernel(vcpu))
3850 goto out;
3851 r = -EFAULT;
3852 if (copy_from_user(&va, argp, sizeof va))
3853 goto out;
3854 idx = srcu_read_lock(&vcpu->kvm->srcu);
3855 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3856 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3857 break;
3858 }
3859 case KVM_X86_SETUP_MCE: {
3860 u64 mcg_cap;
3861
3862 r = -EFAULT;
3863 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3864 goto out;
3865 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3866 break;
3867 }
3868 case KVM_X86_SET_MCE: {
3869 struct kvm_x86_mce mce;
3870
3871 r = -EFAULT;
3872 if (copy_from_user(&mce, argp, sizeof mce))
3873 goto out;
3874 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3875 break;
3876 }
3877 case KVM_GET_VCPU_EVENTS: {
3878 struct kvm_vcpu_events events;
3879
3880 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3881
3882 r = -EFAULT;
3883 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3884 break;
3885 r = 0;
3886 break;
3887 }
3888 case KVM_SET_VCPU_EVENTS: {
3889 struct kvm_vcpu_events events;
3890
3891 r = -EFAULT;
3892 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3893 break;
3894
3895 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3896 break;
3897 }
3898 case KVM_GET_DEBUGREGS: {
3899 struct kvm_debugregs dbgregs;
3900
3901 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3902
3903 r = -EFAULT;
3904 if (copy_to_user(argp, &dbgregs,
3905 sizeof(struct kvm_debugregs)))
3906 break;
3907 r = 0;
3908 break;
3909 }
3910 case KVM_SET_DEBUGREGS: {
3911 struct kvm_debugregs dbgregs;
3912
3913 r = -EFAULT;
3914 if (copy_from_user(&dbgregs, argp,
3915 sizeof(struct kvm_debugregs)))
3916 break;
3917
3918 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3919 break;
3920 }
3921 case KVM_GET_XSAVE: {
3922 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3923 r = -ENOMEM;
3924 if (!u.xsave)
3925 break;
3926
3927 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3928
3929 r = -EFAULT;
3930 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3931 break;
3932 r = 0;
3933 break;
3934 }
3935 case KVM_SET_XSAVE: {
3936 u.xsave = memdup_user(argp, sizeof(*u.xsave));
3937 if (IS_ERR(u.xsave)) {
3938 r = PTR_ERR(u.xsave);
3939 goto out_nofree;
3940 }
3941
3942 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3943 break;
3944 }
3945 case KVM_GET_XCRS: {
3946 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3947 r = -ENOMEM;
3948 if (!u.xcrs)
3949 break;
3950
3951 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3952
3953 r = -EFAULT;
3954 if (copy_to_user(argp, u.xcrs,
3955 sizeof(struct kvm_xcrs)))
3956 break;
3957 r = 0;
3958 break;
3959 }
3960 case KVM_SET_XCRS: {
3961 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
3962 if (IS_ERR(u.xcrs)) {
3963 r = PTR_ERR(u.xcrs);
3964 goto out_nofree;
3965 }
3966
3967 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3968 break;
3969 }
3970 case KVM_SET_TSC_KHZ: {
3971 u32 user_tsc_khz;
3972
3973 r = -EINVAL;
3974 user_tsc_khz = (u32)arg;
3975
3976 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3977 goto out;
3978
3979 if (user_tsc_khz == 0)
3980 user_tsc_khz = tsc_khz;
3981
3982 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
3983 r = 0;
3984
3985 goto out;
3986 }
3987 case KVM_GET_TSC_KHZ: {
3988 r = vcpu->arch.virtual_tsc_khz;
3989 goto out;
3990 }
3991 case KVM_KVMCLOCK_CTRL: {
3992 r = kvm_set_guest_paused(vcpu);
3993 goto out;
3994 }
3995 case KVM_ENABLE_CAP: {
3996 struct kvm_enable_cap cap;
3997
3998 r = -EFAULT;
3999 if (copy_from_user(&cap, argp, sizeof(cap)))
4000 goto out;
4001 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4002 break;
4003 }
4004 case KVM_GET_NESTED_STATE: {
4005 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4006 u32 user_data_size;
4007
4008 r = -EINVAL;
4009 if (!kvm_x86_ops->get_nested_state)
4010 break;
4011
4012 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
4013 r = -EFAULT;
4014 if (get_user(user_data_size, &user_kvm_nested_state->size))
4015 break;
4016
4017 r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
4018 user_data_size);
4019 if (r < 0)
4020 break;
4021
4022 if (r > user_data_size) {
4023 if (put_user(r, &user_kvm_nested_state->size))
4024 r = -EFAULT;
4025 else
4026 r = -E2BIG;
4027 break;
4028 }
4029
4030 r = 0;
4031 break;
4032 }
4033 case KVM_SET_NESTED_STATE: {
4034 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4035 struct kvm_nested_state kvm_state;
4036
4037 r = -EINVAL;
4038 if (!kvm_x86_ops->set_nested_state)
4039 break;
4040
4041 r = -EFAULT;
4042 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
4043 break;
4044
4045 r = -EINVAL;
4046 if (kvm_state.size < sizeof(kvm_state))
4047 break;
4048
4049 if (kvm_state.flags &
4050 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
4051 break;
4052
4053
4054 if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
4055 break;
4056
4057 r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
4058 break;
4059 }
4060 default:
4061 r = -EINVAL;
4062 }
4063out:
4064 kfree(u.buffer);
4065out_nofree:
4066 vcpu_put(vcpu);
4067 return r;
4068}
4069
4070vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4071{
4072 return VM_FAULT_SIGBUS;
4073}
4074
4075static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
4076{
4077 int ret;
4078
4079 if (addr > (unsigned int)(-3 * PAGE_SIZE))
4080 return -EINVAL;
4081 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
4082 return ret;
4083}
4084
4085static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
4086 u64 ident_addr)
4087{
4088 return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
4089}
4090
4091static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
4092 u32 kvm_nr_mmu_pages)
4093{
4094 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
4095 return -EINVAL;
4096
4097 mutex_lock(&kvm->slots_lock);
4098
4099 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
4100 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
4101
4102 mutex_unlock(&kvm->slots_lock);
4103 return 0;
4104}
4105
4106static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
4107{
4108 return kvm->arch.n_max_mmu_pages;
4109}
4110
4111static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4112{
4113 struct kvm_pic *pic = kvm->arch.vpic;
4114 int r;
4115
4116 r = 0;
4117 switch (chip->chip_id) {
4118 case KVM_IRQCHIP_PIC_MASTER:
4119 memcpy(&chip->chip.pic, &pic->pics[0],
4120 sizeof(struct kvm_pic_state));
4121 break;
4122 case KVM_IRQCHIP_PIC_SLAVE:
4123 memcpy(&chip->chip.pic, &pic->pics[1],
4124 sizeof(struct kvm_pic_state));
4125 break;
4126 case KVM_IRQCHIP_IOAPIC:
4127 kvm_get_ioapic(kvm, &chip->chip.ioapic);
4128 break;
4129 default:
4130 r = -EINVAL;
4131 break;
4132 }
4133 return r;
4134}
4135
4136static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4137{
4138 struct kvm_pic *pic = kvm->arch.vpic;
4139 int r;
4140
4141 r = 0;
4142 switch (chip->chip_id) {
4143 case KVM_IRQCHIP_PIC_MASTER:
4144 spin_lock(&pic->lock);
4145 memcpy(&pic->pics[0], &chip->chip.pic,
4146 sizeof(struct kvm_pic_state));
4147 spin_unlock(&pic->lock);
4148 break;
4149 case KVM_IRQCHIP_PIC_SLAVE:
4150 spin_lock(&pic->lock);
4151 memcpy(&pic->pics[1], &chip->chip.pic,
4152 sizeof(struct kvm_pic_state));
4153 spin_unlock(&pic->lock);
4154 break;
4155 case KVM_IRQCHIP_IOAPIC:
4156 kvm_set_ioapic(kvm, &chip->chip.ioapic);
4157 break;
4158 default:
4159 r = -EINVAL;
4160 break;
4161 }
4162 kvm_pic_update_irq(pic);
4163 return r;
4164}
4165
4166static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4167{
4168 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
4169
4170 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
4171
4172 mutex_lock(&kps->lock);
4173 memcpy(ps, &kps->channels, sizeof(*ps));
4174 mutex_unlock(&kps->lock);
4175 return 0;
4176}
4177
4178static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4179{
4180 int i;
4181 struct kvm_pit *pit = kvm->arch.vpit;
4182
4183 mutex_lock(&pit->pit_state.lock);
4184 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
4185 for (i = 0; i < 3; i++)
4186 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
4187 mutex_unlock(&pit->pit_state.lock);
4188 return 0;
4189}
4190
4191static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4192{
4193 mutex_lock(&kvm->arch.vpit->pit_state.lock);
4194 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
4195 sizeof(ps->channels));
4196 ps->flags = kvm->arch.vpit->pit_state.flags;
4197 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
4198 memset(&ps->reserved, 0, sizeof(ps->reserved));
4199 return 0;
4200}
4201
4202static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4203{
4204 int start = 0;
4205 int i;
4206 u32 prev_legacy, cur_legacy;
4207 struct kvm_pit *pit = kvm->arch.vpit;
4208
4209 mutex_lock(&pit->pit_state.lock);
4210 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
4211 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
4212 if (!prev_legacy && cur_legacy)
4213 start = 1;
4214 memcpy(&pit->pit_state.channels, &ps->channels,
4215 sizeof(pit->pit_state.channels));
4216 pit->pit_state.flags = ps->flags;
4217 for (i = 0; i < 3; i++)
4218 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
4219 start && i == 0);
4220 mutex_unlock(&pit->pit_state.lock);
4221 return 0;
4222}
4223
4224static int kvm_vm_ioctl_reinject(struct kvm *kvm,
4225 struct kvm_reinject_control *control)
4226{
4227 struct kvm_pit *pit = kvm->arch.vpit;
4228
4229 if (!pit)
4230 return -ENXIO;
4231
4232
4233
4234
4235
4236 mutex_lock(&pit->pit_state.lock);
4237 kvm_pit_set_reinject(pit, control->pit_reinject);
4238 mutex_unlock(&pit->pit_state.lock);
4239
4240 return 0;
4241}
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
4263{
4264 bool is_dirty = false;
4265 int r;
4266
4267 mutex_lock(&kvm->slots_lock);
4268
4269
4270
4271
4272 if (kvm_x86_ops->flush_log_dirty)
4273 kvm_x86_ops->flush_log_dirty(kvm);
4274
4275 r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
4276
4277
4278
4279
4280
4281 lockdep_assert_held(&kvm->slots_lock);
4282 if (is_dirty)
4283 kvm_flush_remote_tlbs(kvm);
4284
4285 mutex_unlock(&kvm->slots_lock);
4286 return r;
4287}
4288
4289int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
4290 bool line_status)
4291{
4292 if (!irqchip_in_kernel(kvm))
4293 return -ENXIO;
4294
4295 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
4296 irq_event->irq, irq_event->level,
4297 line_status);
4298 return 0;
4299}
4300
4301static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
4302 struct kvm_enable_cap *cap)
4303{
4304 int r;
4305
4306 if (cap->flags)
4307 return -EINVAL;
4308
4309 switch (cap->cap) {
4310 case KVM_CAP_DISABLE_QUIRKS:
4311 kvm->arch.disabled_quirks = cap->args[0];
4312 r = 0;
4313 break;
4314 case KVM_CAP_SPLIT_IRQCHIP: {
4315 mutex_lock(&kvm->lock);
4316 r = -EINVAL;
4317 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
4318 goto split_irqchip_unlock;
4319 r = -EEXIST;
4320 if (irqchip_in_kernel(kvm))
4321 goto split_irqchip_unlock;
4322 if (kvm->created_vcpus)
4323 goto split_irqchip_unlock;
4324 r = kvm_setup_empty_irq_routing(kvm);
4325 if (r)
4326 goto split_irqchip_unlock;
4327
4328 smp_wmb();
4329 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
4330 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
4331 r = 0;
4332split_irqchip_unlock:
4333 mutex_unlock(&kvm->lock);
4334 break;
4335 }
4336 case KVM_CAP_X2APIC_API:
4337 r = -EINVAL;
4338 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
4339 break;
4340
4341 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
4342 kvm->arch.x2apic_format = true;
4343 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
4344 kvm->arch.x2apic_broadcast_quirk_disabled = true;
4345
4346 r = 0;
4347 break;
4348 case KVM_CAP_X86_DISABLE_EXITS:
4349 r = -EINVAL;
4350 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
4351 break;
4352
4353 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
4354 kvm_can_mwait_in_guest())
4355 kvm->arch.mwait_in_guest = true;
4356 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
4357 kvm->arch.hlt_in_guest = true;
4358 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
4359 kvm->arch.pause_in_guest = true;
4360 r = 0;
4361 break;
4362 case KVM_CAP_MSR_PLATFORM_INFO:
4363 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
4364 r = 0;
4365 break;
4366 default:
4367 r = -EINVAL;
4368 break;
4369 }
4370 return r;
4371}
4372
4373long kvm_arch_vm_ioctl(struct file *filp,
4374 unsigned int ioctl, unsigned long arg)
4375{
4376 struct kvm *kvm = filp->private_data;
4377 void __user *argp = (void __user *)arg;
4378 int r = -ENOTTY;
4379
4380
4381
4382
4383
4384 union {
4385 struct kvm_pit_state ps;
4386 struct kvm_pit_state2 ps2;
4387 struct kvm_pit_config pit_config;
4388 } u;
4389
4390 switch (ioctl) {
4391 case KVM_SET_TSS_ADDR:
4392 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
4393 break;
4394 case KVM_SET_IDENTITY_MAP_ADDR: {
4395 u64 ident_addr;
4396
4397 mutex_lock(&kvm->lock);
4398 r = -EINVAL;
4399 if (kvm->created_vcpus)
4400 goto set_identity_unlock;
4401 r = -EFAULT;
4402 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
4403 goto set_identity_unlock;
4404 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
4405set_identity_unlock:
4406 mutex_unlock(&kvm->lock);
4407 break;
4408 }
4409 case KVM_SET_NR_MMU_PAGES:
4410 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
4411 break;
4412 case KVM_GET_NR_MMU_PAGES:
4413 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
4414 break;
4415 case KVM_CREATE_IRQCHIP: {
4416 mutex_lock(&kvm->lock);
4417
4418 r = -EEXIST;
4419 if (irqchip_in_kernel(kvm))
4420 goto create_irqchip_unlock;
4421
4422 r = -EINVAL;
4423 if (kvm->created_vcpus)
4424 goto create_irqchip_unlock;
4425
4426 r = kvm_pic_init(kvm);
4427 if (r)
4428 goto create_irqchip_unlock;
4429
4430 r = kvm_ioapic_init(kvm);
4431 if (r) {
4432 kvm_pic_destroy(kvm);
4433 goto create_irqchip_unlock;
4434 }
4435
4436 r = kvm_setup_default_irq_routing(kvm);
4437 if (r) {
4438 kvm_ioapic_destroy(kvm);
4439 kvm_pic_destroy(kvm);
4440 goto create_irqchip_unlock;
4441 }
4442
4443 smp_wmb();
4444 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
4445 create_irqchip_unlock:
4446 mutex_unlock(&kvm->lock);
4447 break;
4448 }
4449 case KVM_CREATE_PIT:
4450 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
4451 goto create_pit;
4452 case KVM_CREATE_PIT2:
4453 r = -EFAULT;
4454 if (copy_from_user(&u.pit_config, argp,
4455 sizeof(struct kvm_pit_config)))
4456 goto out;
4457 create_pit:
4458 mutex_lock(&kvm->lock);
4459 r = -EEXIST;
4460 if (kvm->arch.vpit)
4461 goto create_pit_unlock;
4462 r = -ENOMEM;
4463 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
4464 if (kvm->arch.vpit)
4465 r = 0;
4466 create_pit_unlock:
4467 mutex_unlock(&kvm->lock);
4468 break;
4469 case KVM_GET_IRQCHIP: {
4470
4471 struct kvm_irqchip *chip;
4472
4473 chip = memdup_user(argp, sizeof(*chip));
4474 if (IS_ERR(chip)) {
4475 r = PTR_ERR(chip);
4476 goto out;
4477 }
4478
4479 r = -ENXIO;
4480 if (!irqchip_kernel(kvm))
4481 goto get_irqchip_out;
4482 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
4483 if (r)
4484 goto get_irqchip_out;
4485 r = -EFAULT;
4486 if (copy_to_user(argp, chip, sizeof *chip))
4487 goto get_irqchip_out;
4488 r = 0;
4489 get_irqchip_out:
4490 kfree(chip);
4491 break;
4492 }
4493 case KVM_SET_IRQCHIP: {
4494
4495 struct kvm_irqchip *chip;
4496
4497 chip = memdup_user(argp, sizeof(*chip));
4498 if (IS_ERR(chip)) {
4499 r = PTR_ERR(chip);
4500 goto out;
4501 }
4502
4503 r = -ENXIO;
4504 if (!irqchip_kernel(kvm))
4505 goto set_irqchip_out;
4506 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
4507 if (r)
4508 goto set_irqchip_out;
4509 r = 0;
4510 set_irqchip_out:
4511 kfree(chip);
4512 break;
4513 }
4514 case KVM_GET_PIT: {
4515 r = -EFAULT;
4516 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
4517 goto out;
4518 r = -ENXIO;
4519 if (!kvm->arch.vpit)
4520 goto out;
4521 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
4522 if (r)
4523 goto out;
4524 r = -EFAULT;
4525 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
4526 goto out;
4527 r = 0;
4528 break;
4529 }
4530 case KVM_SET_PIT: {
4531 r = -EFAULT;
4532 if (copy_from_user(&u.ps, argp, sizeof u.ps))
4533 goto out;
4534 r = -ENXIO;
4535 if (!kvm->arch.vpit)
4536 goto out;
4537 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
4538 break;
4539 }
4540 case KVM_GET_PIT2: {
4541 r = -ENXIO;
4542 if (!kvm->arch.vpit)
4543 goto out;
4544 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
4545 if (r)
4546 goto out;
4547 r = -EFAULT;
4548 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
4549 goto out;
4550 r = 0;
4551 break;
4552 }
4553 case KVM_SET_PIT2: {
4554 r = -EFAULT;
4555 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
4556 goto out;
4557 r = -ENXIO;
4558 if (!kvm->arch.vpit)
4559 goto out;
4560 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
4561 break;
4562 }
4563 case KVM_REINJECT_CONTROL: {
4564 struct kvm_reinject_control control;
4565 r = -EFAULT;
4566 if (copy_from_user(&control, argp, sizeof(control)))
4567 goto out;
4568 r = kvm_vm_ioctl_reinject(kvm, &control);
4569 break;
4570 }
4571 case KVM_SET_BOOT_CPU_ID:
4572 r = 0;
4573 mutex_lock(&kvm->lock);
4574 if (kvm->created_vcpus)
4575 r = -EBUSY;
4576 else
4577 kvm->arch.bsp_vcpu_id = arg;
4578 mutex_unlock(&kvm->lock);
4579 break;
4580 case KVM_XEN_HVM_CONFIG: {
4581 struct kvm_xen_hvm_config xhc;
4582 r = -EFAULT;
4583 if (copy_from_user(&xhc, argp, sizeof(xhc)))
4584 goto out;
4585 r = -EINVAL;
4586 if (xhc.flags)
4587 goto out;
4588 memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
4589 r = 0;
4590 break;
4591 }
4592 case KVM_SET_CLOCK: {
4593 struct kvm_clock_data user_ns;
4594 u64 now_ns;
4595
4596 r = -EFAULT;
4597 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
4598 goto out;
4599
4600 r = -EINVAL;
4601 if (user_ns.flags)
4602 goto out;
4603
4604 r = 0;
4605
4606
4607
4608
4609
4610 kvm_gen_update_masterclock(kvm);
4611 now_ns = get_kvmclock_ns(kvm);
4612 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
4613 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
4614 break;
4615 }
4616 case KVM_GET_CLOCK: {
4617 struct kvm_clock_data user_ns;
4618 u64 now_ns;
4619
4620 now_ns = get_kvmclock_ns(kvm);
4621 user_ns.clock = now_ns;
4622 user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
4623 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
4624
4625 r = -EFAULT;
4626 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
4627 goto out;
4628 r = 0;
4629 break;
4630 }
4631 case KVM_ENABLE_CAP: {
4632 struct kvm_enable_cap cap;
4633
4634 r = -EFAULT;
4635 if (copy_from_user(&cap, argp, sizeof(cap)))
4636 goto out;
4637 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
4638 break;
4639 }
4640 case KVM_MEMORY_ENCRYPT_OP: {
4641 r = -ENOTTY;
4642 if (kvm_x86_ops->mem_enc_op)
4643 r = kvm_x86_ops->mem_enc_op(kvm, argp);
4644 break;
4645 }
4646 case KVM_MEMORY_ENCRYPT_REG_REGION: {
4647 struct kvm_enc_region region;
4648
4649 r = -EFAULT;
4650 if (copy_from_user(®ion, argp, sizeof(region)))
4651 goto out;
4652
4653 r = -ENOTTY;
4654 if (kvm_x86_ops->mem_enc_reg_region)
4655 r = kvm_x86_ops->mem_enc_reg_region(kvm, ®ion);
4656 break;
4657 }
4658 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
4659 struct kvm_enc_region region;
4660
4661 r = -EFAULT;
4662 if (copy_from_user(®ion, argp, sizeof(region)))
4663 goto out;
4664
4665 r = -ENOTTY;
4666 if (kvm_x86_ops->mem_enc_unreg_region)
4667 r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion);
4668 break;
4669 }
4670 case KVM_HYPERV_EVENTFD: {
4671 struct kvm_hyperv_eventfd hvevfd;
4672
4673 r = -EFAULT;
4674 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
4675 goto out;
4676 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
4677 break;
4678 }
4679 default:
4680 r = -ENOTTY;
4681 }
4682out:
4683 return r;
4684}
4685
4686static void kvm_init_msr_list(void)
4687{
4688 u32 dummy[2];
4689 unsigned i, j;
4690
4691 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
4692 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
4693 continue;
4694
4695
4696
4697
4698
4699 switch (msrs_to_save[i]) {
4700 case MSR_IA32_BNDCFGS:
4701 if (!kvm_mpx_supported())
4702 continue;
4703 break;
4704 case MSR_TSC_AUX:
4705 if (!kvm_x86_ops->rdtscp_supported())
4706 continue;
4707 break;
4708 default:
4709 break;
4710 }
4711
4712 if (j < i)
4713 msrs_to_save[j] = msrs_to_save[i];
4714 j++;
4715 }
4716 num_msrs_to_save = j;
4717
4718 for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
4719 if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
4720 continue;
4721
4722 if (j < i)
4723 emulated_msrs[j] = emulated_msrs[i];
4724 j++;
4725 }
4726 num_emulated_msrs = j;
4727
4728 for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
4729 struct kvm_msr_entry msr;
4730
4731 msr.index = msr_based_features[i];
4732 if (kvm_get_msr_feature(&msr))
4733 continue;
4734
4735 if (j < i)
4736 msr_based_features[j] = msr_based_features[i];
4737 j++;
4738 }
4739 num_msr_based_features = j;
4740}
4741
4742static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
4743 const void *v)
4744{
4745 int handled = 0;
4746 int n;
4747
4748 do {
4749 n = min(len, 8);
4750 if (!(lapic_in_kernel(vcpu) &&
4751 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
4752 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
4753 break;
4754 handled += n;
4755 addr += n;
4756 len -= n;
4757 v += n;
4758 } while (len);
4759
4760 return handled;
4761}
4762
4763static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4764{
4765 int handled = 0;
4766 int n;
4767
4768 do {
4769 n = min(len, 8);
4770 if (!(lapic_in_kernel(vcpu) &&
4771 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
4772 addr, n, v))
4773 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
4774 break;
4775 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
4776 handled += n;
4777 addr += n;
4778 len -= n;
4779 v += n;
4780 } while (len);
4781
4782 return handled;
4783}
4784
4785static void kvm_set_segment(struct kvm_vcpu *vcpu,
4786 struct kvm_segment *var, int seg)
4787{
4788 kvm_x86_ops->set_segment(vcpu, var, seg);
4789}
4790
4791void kvm_get_segment(struct kvm_vcpu *vcpu,
4792 struct kvm_segment *var, int seg)
4793{
4794 kvm_x86_ops->get_segment(vcpu, var, seg);
4795}
4796
4797gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
4798 struct x86_exception *exception)
4799{
4800 gpa_t t_gpa;
4801
4802 BUG_ON(!mmu_is_nested(vcpu));
4803
4804
4805 access |= PFERR_USER_MASK;
4806 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
4807
4808 return t_gpa;
4809}
4810
4811gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
4812 struct x86_exception *exception)
4813{
4814 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4815 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4816}
4817
4818 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
4819 struct x86_exception *exception)
4820{
4821 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4822 access |= PFERR_FETCH_MASK;
4823 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4824}
4825
4826gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
4827 struct x86_exception *exception)
4828{
4829 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4830 access |= PFERR_WRITE_MASK;
4831 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4832}
4833
4834
4835gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
4836 struct x86_exception *exception)
4837{
4838 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
4839}
4840
4841static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
4842 struct kvm_vcpu *vcpu, u32 access,
4843 struct x86_exception *exception)
4844{
4845 void *data = val;
4846 int r = X86EMUL_CONTINUE;
4847
4848 while (bytes) {
4849 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
4850 exception);
4851 unsigned offset = addr & (PAGE_SIZE-1);
4852 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
4853 int ret;
4854
4855 if (gpa == UNMAPPED_GVA)
4856 return X86EMUL_PROPAGATE_FAULT;
4857 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
4858 offset, toread);
4859 if (ret < 0) {
4860 r = X86EMUL_IO_NEEDED;
4861 goto out;
4862 }
4863
4864 bytes -= toread;
4865 data += toread;
4866 addr += toread;
4867 }
4868out:
4869 return r;
4870}
4871
4872
4873static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
4874 gva_t addr, void *val, unsigned int bytes,
4875 struct x86_exception *exception)
4876{
4877 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4878 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4879 unsigned offset;
4880 int ret;
4881
4882
4883 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
4884 exception);
4885 if (unlikely(gpa == UNMAPPED_GVA))
4886 return X86EMUL_PROPAGATE_FAULT;
4887
4888 offset = addr & (PAGE_SIZE-1);
4889 if (WARN_ON(offset + bytes > PAGE_SIZE))
4890 bytes = (unsigned)PAGE_SIZE - offset;
4891 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
4892 offset, bytes);
4893 if (unlikely(ret < 0))
4894 return X86EMUL_IO_NEEDED;
4895
4896 return X86EMUL_CONTINUE;
4897}
4898
4899int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
4900 gva_t addr, void *val, unsigned int bytes,
4901 struct x86_exception *exception)
4902{
4903 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4904
4905 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
4906 exception);
4907}
4908EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
4909
4910static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
4911 gva_t addr, void *val, unsigned int bytes,
4912 struct x86_exception *exception, bool system)
4913{
4914 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4915 u32 access = 0;
4916
4917 if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
4918 access |= PFERR_USER_MASK;
4919
4920 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
4921}
4922
4923static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
4924 unsigned long addr, void *val, unsigned int bytes)
4925{
4926 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4927 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
4928
4929 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
4930}
4931
4932static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
4933 struct kvm_vcpu *vcpu, u32 access,
4934 struct x86_exception *exception)
4935{
4936 void *data = val;
4937 int r = X86EMUL_CONTINUE;
4938
4939 while (bytes) {
4940 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
4941 access,
4942 exception);
4943 unsigned offset = addr & (PAGE_SIZE-1);
4944 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
4945 int ret;
4946
4947 if (gpa == UNMAPPED_GVA)
4948 return X86EMUL_PROPAGATE_FAULT;
4949 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
4950 if (ret < 0) {
4951 r = X86EMUL_IO_NEEDED;
4952 goto out;
4953 }
4954
4955 bytes -= towrite;
4956 data += towrite;
4957 addr += towrite;
4958 }
4959out:
4960 return r;
4961}
4962
4963static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
4964 unsigned int bytes, struct x86_exception *exception,
4965 bool system)
4966{
4967 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4968 u32 access = PFERR_WRITE_MASK;
4969
4970 if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
4971 access |= PFERR_USER_MASK;
4972
4973 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
4974 access, exception);
4975}
4976
4977int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
4978 unsigned int bytes, struct x86_exception *exception)
4979{
4980
4981 vcpu->arch.l1tf_flush_l1d = true;
4982
4983 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
4984 PFERR_WRITE_MASK, exception);
4985}
4986EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4987
4988int handle_ud(struct kvm_vcpu *vcpu)
4989{
4990 int emul_type = EMULTYPE_TRAP_UD;
4991 enum emulation_result er;
4992 char sig[5];
4993 struct x86_exception e;
4994
4995 if (force_emulation_prefix &&
4996 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
4997 sig, sizeof(sig), &e) == 0 &&
4998 memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
4999 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
5000 emul_type = 0;
5001 }
5002
5003 er = kvm_emulate_instruction(vcpu, emul_type);
5004 if (er == EMULATE_USER_EXIT)
5005 return 0;
5006 if (er != EMULATE_DONE)
5007 kvm_queue_exception(vcpu, UD_VECTOR);
5008 return 1;
5009}
5010EXPORT_SYMBOL_GPL(handle_ud);
5011
5012static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5013 gpa_t gpa, bool write)
5014{
5015
5016 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5017 return 1;
5018
5019 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
5020 trace_vcpu_match_mmio(gva, gpa, write, true);
5021 return 1;
5022 }
5023
5024 return 0;
5025}
5026
5027static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5028 gpa_t *gpa, struct x86_exception *exception,
5029 bool write)
5030{
5031 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
5032 | (write ? PFERR_WRITE_MASK : 0);
5033
5034
5035
5036
5037
5038
5039 if (vcpu_match_mmio_gva(vcpu, gva)
5040 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
5041 vcpu->arch.access, 0, access)) {
5042 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
5043 (gva & (PAGE_SIZE - 1));
5044 trace_vcpu_match_mmio(gva, *gpa, write, false);
5045 return 1;
5046 }
5047
5048 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5049
5050 if (*gpa == UNMAPPED_GVA)
5051 return -1;
5052
5053 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
5054}
5055
5056int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
5057 const void *val, int bytes)
5058{
5059 int ret;
5060
5061 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
5062 if (ret < 0)
5063 return 0;
5064 kvm_page_track_write(vcpu, gpa, val, bytes);
5065 return 1;
5066}
5067
5068struct read_write_emulator_ops {
5069 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
5070 int bytes);
5071 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
5072 void *val, int bytes);
5073 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5074 int bytes, void *val);
5075 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5076 void *val, int bytes);
5077 bool write;
5078};
5079
5080static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
5081{
5082 if (vcpu->mmio_read_completed) {
5083 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
5084 vcpu->mmio_fragments[0].gpa, val);
5085 vcpu->mmio_read_completed = 0;
5086 return 1;
5087 }
5088
5089 return 0;
5090}
5091
5092static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5093 void *val, int bytes)
5094{
5095 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
5096}
5097
5098static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5099 void *val, int bytes)
5100{
5101 return emulator_write_phys(vcpu, gpa, val, bytes);
5102}
5103
5104static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
5105{
5106 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
5107 return vcpu_mmio_write(vcpu, gpa, bytes, val);
5108}
5109
5110static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5111 void *val, int bytes)
5112{
5113 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
5114 return X86EMUL_IO_NEEDED;
5115}
5116
5117static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5118 void *val, int bytes)
5119{
5120 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
5121
5122 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
5123 return X86EMUL_CONTINUE;
5124}
5125
5126static const struct read_write_emulator_ops read_emultor = {
5127 .read_write_prepare = read_prepare,
5128 .read_write_emulate = read_emulate,
5129 .read_write_mmio = vcpu_mmio_read,
5130 .read_write_exit_mmio = read_exit_mmio,
5131};
5132
5133static const struct read_write_emulator_ops write_emultor = {
5134 .read_write_emulate = write_emulate,
5135 .read_write_mmio = write_mmio,
5136 .read_write_exit_mmio = write_exit_mmio,
5137 .write = true,
5138};
5139
5140static int emulator_read_write_onepage(unsigned long addr, void *val,
5141 unsigned int bytes,
5142 struct x86_exception *exception,
5143 struct kvm_vcpu *vcpu,
5144 const struct read_write_emulator_ops *ops)
5145{
5146 gpa_t gpa;
5147 int handled, ret;
5148 bool write = ops->write;
5149 struct kvm_mmio_fragment *frag;
5150 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5151
5152
5153
5154
5155
5156
5157
5158
5159 if (vcpu->arch.gpa_available &&
5160 emulator_can_use_gpa(ctxt) &&
5161 (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
5162 gpa = vcpu->arch.gpa_val;
5163 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
5164 } else {
5165 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
5166 if (ret < 0)
5167 return X86EMUL_PROPAGATE_FAULT;
5168 }
5169
5170 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
5171 return X86EMUL_CONTINUE;
5172
5173
5174
5175
5176 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
5177 if (handled == bytes)
5178 return X86EMUL_CONTINUE;
5179
5180 gpa += handled;
5181 bytes -= handled;
5182 val += handled;
5183
5184 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
5185 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
5186 frag->gpa = gpa;
5187 frag->data = val;
5188 frag->len = bytes;
5189 return X86EMUL_CONTINUE;
5190}
5191
5192static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
5193 unsigned long addr,
5194 void *val, unsigned int bytes,
5195 struct x86_exception *exception,
5196 const struct read_write_emulator_ops *ops)
5197{
5198 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5199 gpa_t gpa;
5200 int rc;
5201
5202 if (ops->read_write_prepare &&
5203 ops->read_write_prepare(vcpu, val, bytes))
5204 return X86EMUL_CONTINUE;
5205
5206 vcpu->mmio_nr_fragments = 0;
5207
5208
5209 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
5210 int now;
5211
5212 now = -addr & ~PAGE_MASK;
5213 rc = emulator_read_write_onepage(addr, val, now, exception,
5214 vcpu, ops);
5215
5216 if (rc != X86EMUL_CONTINUE)
5217 return rc;
5218 addr += now;
5219 if (ctxt->mode != X86EMUL_MODE_PROT64)
5220 addr = (u32)addr;
5221 val += now;
5222 bytes -= now;
5223 }
5224
5225 rc = emulator_read_write_onepage(addr, val, bytes, exception,
5226 vcpu, ops);
5227 if (rc != X86EMUL_CONTINUE)
5228 return rc;
5229
5230 if (!vcpu->mmio_nr_fragments)
5231 return rc;
5232
5233 gpa = vcpu->mmio_fragments[0].gpa;
5234
5235 vcpu->mmio_needed = 1;
5236 vcpu->mmio_cur_fragment = 0;
5237
5238 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
5239 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
5240 vcpu->run->exit_reason = KVM_EXIT_MMIO;
5241 vcpu->run->mmio.phys_addr = gpa;
5242
5243 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
5244}
5245
5246static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
5247 unsigned long addr,
5248 void *val,
5249 unsigned int bytes,
5250 struct x86_exception *exception)
5251{
5252 return emulator_read_write(ctxt, addr, val, bytes,
5253 exception, &read_emultor);
5254}
5255
5256static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
5257 unsigned long addr,
5258 const void *val,
5259 unsigned int bytes,
5260 struct x86_exception *exception)
5261{
5262 return emulator_read_write(ctxt, addr, (void *)val, bytes,
5263 exception, &write_emultor);
5264}
5265
5266#define CMPXCHG_TYPE(t, ptr, old, new) \
5267 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
5268
5269#ifdef CONFIG_X86_64
5270# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
5271#else
5272# define CMPXCHG64(ptr, old, new) \
5273 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
5274#endif
5275
5276static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
5277 unsigned long addr,
5278 const void *old,
5279 const void *new,
5280 unsigned int bytes,
5281 struct x86_exception *exception)
5282{
5283 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5284 gpa_t gpa;
5285 struct page *page;
5286 char *kaddr;
5287 bool exchanged;
5288
5289
5290 if (bytes > 8 || (bytes & (bytes - 1)))
5291 goto emul_write;
5292
5293 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
5294
5295 if (gpa == UNMAPPED_GVA ||
5296 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5297 goto emul_write;
5298
5299 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
5300 goto emul_write;
5301
5302 page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
5303 if (is_error_page(page))
5304 goto emul_write;
5305
5306 kaddr = kmap_atomic(page);
5307 kaddr += offset_in_page(gpa);
5308 switch (bytes) {
5309 case 1:
5310 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
5311 break;
5312 case 2:
5313 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
5314 break;
5315 case 4:
5316 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
5317 break;
5318 case 8:
5319 exchanged = CMPXCHG64(kaddr, old, new);
5320 break;
5321 default:
5322 BUG();
5323 }
5324 kunmap_atomic(kaddr);
5325 kvm_release_page_dirty(page);
5326
5327 if (!exchanged)
5328 return X86EMUL_CMPXCHG_FAILED;
5329
5330 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
5331 kvm_page_track_write(vcpu, gpa, new, bytes);
5332
5333 return X86EMUL_CONTINUE;
5334
5335emul_write:
5336 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
5337
5338 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
5339}
5340
5341static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
5342{
5343 int r = 0, i;
5344
5345 for (i = 0; i < vcpu->arch.pio.count; i++) {
5346 if (vcpu->arch.pio.in)
5347 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
5348 vcpu->arch.pio.size, pd);
5349 else
5350 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
5351 vcpu->arch.pio.port, vcpu->arch.pio.size,
5352 pd);
5353 if (r)
5354 break;
5355 pd += vcpu->arch.pio.size;
5356 }
5357 return r;
5358}
5359
5360static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
5361 unsigned short port, void *val,
5362 unsigned int count, bool in)
5363{
5364 vcpu->arch.pio.port = port;
5365 vcpu->arch.pio.in = in;
5366 vcpu->arch.pio.count = count;
5367 vcpu->arch.pio.size = size;
5368
5369 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
5370 vcpu->arch.pio.count = 0;
5371 return 1;
5372 }
5373
5374 vcpu->run->exit_reason = KVM_EXIT_IO;
5375 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
5376 vcpu->run->io.size = size;
5377 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
5378 vcpu->run->io.count = count;
5379 vcpu->run->io.port = port;
5380
5381 return 0;
5382}
5383
5384static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
5385 int size, unsigned short port, void *val,
5386 unsigned int count)
5387{
5388 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5389 int ret;
5390
5391 if (vcpu->arch.pio.count)
5392 goto data_avail;
5393
5394 memset(vcpu->arch.pio_data, 0, size * count);
5395
5396 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
5397 if (ret) {
5398data_avail:
5399 memcpy(val, vcpu->arch.pio_data, size * count);
5400 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
5401 vcpu->arch.pio.count = 0;
5402 return 1;
5403 }
5404
5405 return 0;
5406}
5407
5408static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
5409 int size, unsigned short port,
5410 const void *val, unsigned int count)
5411{
5412 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5413
5414 memcpy(vcpu->arch.pio_data, val, size * count);
5415 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
5416 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
5417}
5418
5419static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
5420{
5421 return kvm_x86_ops->get_segment_base(vcpu, seg);
5422}
5423
5424static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
5425{
5426 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
5427}
5428
5429static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
5430{
5431 if (!need_emulate_wbinvd(vcpu))
5432 return X86EMUL_CONTINUE;
5433
5434 if (kvm_x86_ops->has_wbinvd_exit()) {
5435 int cpu = get_cpu();
5436
5437 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
5438 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
5439 wbinvd_ipi, NULL, 1);
5440 put_cpu();
5441 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
5442 } else
5443 wbinvd();
5444 return X86EMUL_CONTINUE;
5445}
5446
5447int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
5448{
5449 kvm_emulate_wbinvd_noskip(vcpu);
5450 return kvm_skip_emulated_instruction(vcpu);
5451}
5452EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
5453
5454
5455
5456static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
5457{
5458 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
5459}
5460
5461static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
5462 unsigned long *dest)
5463{
5464 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
5465}
5466
5467static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
5468 unsigned long value)
5469{
5470
5471 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
5472}
5473
5474static u64 mk_cr_64(u64 curr_cr, u32 new_val)
5475{
5476 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
5477}
5478
5479static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
5480{
5481 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5482 unsigned long value;
5483
5484 switch (cr) {
5485 case 0:
5486 value = kvm_read_cr0(vcpu);
5487 break;
5488 case 2:
5489 value = vcpu->arch.cr2;
5490 break;
5491 case 3:
5492 value = kvm_read_cr3(vcpu);
5493 break;
5494 case 4:
5495 value = kvm_read_cr4(vcpu);
5496 break;
5497 case 8:
5498 value = kvm_get_cr8(vcpu);
5499 break;
5500 default:
5501 kvm_err("%s: unexpected cr %u\n", __func__, cr);
5502 return 0;
5503 }
5504
5505 return value;
5506}
5507
5508static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
5509{
5510 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5511 int res = 0;
5512
5513 switch (cr) {
5514 case 0:
5515 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
5516 break;
5517 case 2:
5518 vcpu->arch.cr2 = val;
5519 break;
5520 case 3:
5521 res = kvm_set_cr3(vcpu, val);
5522 break;
5523 case 4:
5524 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
5525 break;
5526 case 8:
5527 res = kvm_set_cr8(vcpu, val);
5528 break;
5529 default:
5530 kvm_err("%s: unexpected cr %u\n", __func__, cr);
5531 res = -1;
5532 }
5533
5534 return res;
5535}
5536
5537static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
5538{
5539 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
5540}
5541
5542static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5543{
5544 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
5545}
5546
5547static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5548{
5549 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
5550}
5551
5552static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5553{
5554 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
5555}
5556
5557static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5558{
5559 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
5560}
5561
5562static unsigned long emulator_get_cached_segment_base(
5563 struct x86_emulate_ctxt *ctxt, int seg)
5564{
5565 return get_segment_base(emul_to_vcpu(ctxt), seg);
5566}
5567
5568static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
5569 struct desc_struct *desc, u32 *base3,
5570 int seg)
5571{
5572 struct kvm_segment var;
5573
5574 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
5575 *selector = var.selector;
5576
5577 if (var.unusable) {
5578 memset(desc, 0, sizeof(*desc));
5579 if (base3)
5580 *base3 = 0;
5581 return false;
5582 }
5583
5584 if (var.g)
5585 var.limit >>= 12;
5586 set_desc_limit(desc, var.limit);
5587 set_desc_base(desc, (unsigned long)var.base);
5588#ifdef CONFIG_X86_64
5589 if (base3)
5590 *base3 = var.base >> 32;
5591#endif
5592 desc->type = var.type;
5593 desc->s = var.s;
5594 desc->dpl = var.dpl;
5595 desc->p = var.present;
5596 desc->avl = var.avl;
5597 desc->l = var.l;
5598 desc->d = var.db;
5599 desc->g = var.g;
5600
5601 return true;
5602}
5603
5604static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
5605 struct desc_struct *desc, u32 base3,
5606 int seg)
5607{
5608 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5609 struct kvm_segment var;
5610
5611 var.selector = selector;
5612 var.base = get_desc_base(desc);
5613#ifdef CONFIG_X86_64
5614 var.base |= ((u64)base3) << 32;
5615#endif
5616 var.limit = get_desc_limit(desc);
5617 if (desc->g)
5618 var.limit = (var.limit << 12) | 0xfff;
5619 var.type = desc->type;
5620 var.dpl = desc->dpl;
5621 var.db = desc->d;
5622 var.s = desc->s;
5623 var.l = desc->l;
5624 var.g = desc->g;
5625 var.avl = desc->avl;
5626 var.present = desc->p;
5627 var.unusable = !var.present;
5628 var.padding = 0;
5629
5630 kvm_set_segment(vcpu, &var, seg);
5631 return;
5632}
5633
5634static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
5635 u32 msr_index, u64 *pdata)
5636{
5637 struct msr_data msr;
5638 int r;
5639
5640 msr.index = msr_index;
5641 msr.host_initiated = false;
5642 r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
5643 if (r)
5644 return r;
5645
5646 *pdata = msr.data;
5647 return 0;
5648}
5649
5650static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
5651 u32 msr_index, u64 data)
5652{
5653 struct msr_data msr;
5654
5655 msr.data = data;
5656 msr.index = msr_index;
5657 msr.host_initiated = false;
5658 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
5659}
5660
5661static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
5662{
5663 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5664
5665 return vcpu->arch.smbase;
5666}
5667
5668static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
5669{
5670 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5671
5672 vcpu->arch.smbase = smbase;
5673}
5674
5675static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
5676 u32 pmc)
5677{
5678 return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
5679}
5680
5681static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
5682 u32 pmc, u64 *pdata)
5683{
5684 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
5685}
5686
5687static void emulator_halt(struct x86_emulate_ctxt *ctxt)
5688{
5689 emul_to_vcpu(ctxt)->arch.halt_request = 1;
5690}
5691
5692static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
5693 struct x86_instruction_info *info,
5694 enum x86_intercept_stage stage)
5695{
5696 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
5697}
5698
5699static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
5700 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
5701{
5702 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
5703}
5704
5705static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
5706{
5707 return kvm_register_read(emul_to_vcpu(ctxt), reg);
5708}
5709
5710static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
5711{
5712 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
5713}
5714
5715static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
5716{
5717 kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
5718}
5719
5720static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
5721{
5722 return emul_to_vcpu(ctxt)->arch.hflags;
5723}
5724
5725static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
5726{
5727 kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
5728}
5729
5730static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase)
5731{
5732 return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase);
5733}
5734
5735static const struct x86_emulate_ops emulate_ops = {
5736 .read_gpr = emulator_read_gpr,
5737 .write_gpr = emulator_write_gpr,
5738 .read_std = emulator_read_std,
5739 .write_std = emulator_write_std,
5740 .read_phys = kvm_read_guest_phys_system,
5741 .fetch = kvm_fetch_guest_virt,
5742 .read_emulated = emulator_read_emulated,
5743 .write_emulated = emulator_write_emulated,
5744 .cmpxchg_emulated = emulator_cmpxchg_emulated,
5745 .invlpg = emulator_invlpg,
5746 .pio_in_emulated = emulator_pio_in_emulated,
5747 .pio_out_emulated = emulator_pio_out_emulated,
5748 .get_segment = emulator_get_segment,
5749 .set_segment = emulator_set_segment,
5750 .get_cached_segment_base = emulator_get_cached_segment_base,
5751 .get_gdt = emulator_get_gdt,
5752 .get_idt = emulator_get_idt,
5753 .set_gdt = emulator_set_gdt,
5754 .set_idt = emulator_set_idt,
5755 .get_cr = emulator_get_cr,
5756 .set_cr = emulator_set_cr,
5757 .cpl = emulator_get_cpl,
5758 .get_dr = emulator_get_dr,
5759 .set_dr = emulator_set_dr,
5760 .get_smbase = emulator_get_smbase,
5761 .set_smbase = emulator_set_smbase,
5762 .set_msr = emulator_set_msr,
5763 .get_msr = emulator_get_msr,
5764 .check_pmc = emulator_check_pmc,
5765 .read_pmc = emulator_read_pmc,
5766 .halt = emulator_halt,
5767 .wbinvd = emulator_wbinvd,
5768 .fix_hypercall = emulator_fix_hypercall,
5769 .intercept = emulator_intercept,
5770 .get_cpuid = emulator_get_cpuid,
5771 .set_nmi_mask = emulator_set_nmi_mask,
5772 .get_hflags = emulator_get_hflags,
5773 .set_hflags = emulator_set_hflags,
5774 .pre_leave_smm = emulator_pre_leave_smm,
5775};
5776
5777static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
5778{
5779 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
5780
5781
5782
5783
5784
5785
5786
5787 if (int_shadow & mask)
5788 mask = 0;
5789 if (unlikely(int_shadow || mask)) {
5790 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
5791 if (!mask)
5792 kvm_make_request(KVM_REQ_EVENT, vcpu);
5793 }
5794}
5795
5796static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
5797{
5798 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5799 if (ctxt->exception.vector == PF_VECTOR)
5800 return kvm_propagate_fault(vcpu, &ctxt->exception);
5801
5802 if (ctxt->exception.error_code_valid)
5803 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
5804 ctxt->exception.error_code);
5805 else
5806 kvm_queue_exception(vcpu, ctxt->exception.vector);
5807 return false;
5808}
5809
5810static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
5811{
5812 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5813 int cs_db, cs_l;
5814
5815 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
5816
5817 ctxt->eflags = kvm_get_rflags(vcpu);
5818 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
5819
5820 ctxt->eip = kvm_rip_read(vcpu);
5821 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
5822 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
5823 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
5824 cs_db ? X86EMUL_MODE_PROT32 :
5825 X86EMUL_MODE_PROT16;
5826 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
5827 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
5828 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
5829
5830 init_decode_cache(ctxt);
5831 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5832}
5833
5834int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
5835{
5836 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5837 int ret;
5838
5839 init_emulate_ctxt(vcpu);
5840
5841 ctxt->op_bytes = 2;
5842 ctxt->ad_bytes = 2;
5843 ctxt->_eip = ctxt->eip + inc_eip;
5844 ret = emulate_int_real(ctxt, irq);
5845
5846 if (ret != X86EMUL_CONTINUE)
5847 return EMULATE_FAIL;
5848
5849 ctxt->eip = ctxt->_eip;
5850 kvm_rip_write(vcpu, ctxt->eip);
5851 kvm_set_rflags(vcpu, ctxt->eflags);
5852
5853 return EMULATE_DONE;
5854}
5855EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
5856
5857static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
5858{
5859 int r = EMULATE_DONE;
5860
5861 ++vcpu->stat.insn_emulation_fail;
5862 trace_kvm_emulate_insn_failed(vcpu);
5863
5864 if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
5865 return EMULATE_FAIL;
5866
5867 if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
5868 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5869 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
5870 vcpu->run->internal.ndata = 0;
5871 r = EMULATE_USER_EXIT;
5872 }
5873
5874 kvm_queue_exception(vcpu, UD_VECTOR);
5875
5876 return r;
5877}
5878
5879static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
5880 bool write_fault_to_shadow_pgtable,
5881 int emulation_type)
5882{
5883 gpa_t gpa = cr2;
5884 kvm_pfn_t pfn;
5885
5886 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
5887 return false;
5888
5889 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
5890 return false;
5891
5892 if (!vcpu->arch.mmu.direct_map) {
5893
5894
5895
5896
5897 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5898
5899
5900
5901
5902
5903 if (gpa == UNMAPPED_GVA)
5904 return true;
5905 }
5906
5907
5908
5909
5910
5911
5912
5913 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
5914
5915
5916
5917
5918
5919 if (is_error_noslot_pfn(pfn))
5920 return false;
5921
5922 kvm_release_pfn_clean(pfn);
5923
5924
5925 if (vcpu->arch.mmu.direct_map) {
5926 unsigned int indirect_shadow_pages;
5927
5928 spin_lock(&vcpu->kvm->mmu_lock);
5929 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
5930 spin_unlock(&vcpu->kvm->mmu_lock);
5931
5932 if (indirect_shadow_pages)
5933 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5934
5935 return true;
5936 }
5937
5938
5939
5940
5941
5942
5943 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5944
5945
5946
5947
5948
5949
5950 return !write_fault_to_shadow_pgtable;
5951}
5952
5953static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
5954 unsigned long cr2, int emulation_type)
5955{
5956 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5957 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
5958
5959 last_retry_eip = vcpu->arch.last_retry_eip;
5960 last_retry_addr = vcpu->arch.last_retry_addr;
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
5976
5977 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
5978 return false;
5979
5980 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
5981 return false;
5982
5983 if (x86_page_table_writing_insn(ctxt))
5984 return false;
5985
5986 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
5987 return false;
5988
5989 vcpu->arch.last_retry_eip = ctxt->eip;
5990 vcpu->arch.last_retry_addr = cr2;
5991
5992 if (!vcpu->arch.mmu.direct_map)
5993 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5994
5995 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5996
5997 return true;
5998}
5999
6000static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
6001static int complete_emulated_pio(struct kvm_vcpu *vcpu);
6002
6003static void kvm_smm_changed(struct kvm_vcpu *vcpu)
6004{
6005 if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
6006
6007 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
6008
6009
6010 kvm_make_request(KVM_REQ_EVENT, vcpu);
6011 }
6012
6013 kvm_mmu_reset_context(vcpu);
6014}
6015
6016static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
6017{
6018 unsigned changed = vcpu->arch.hflags ^ emul_flags;
6019
6020 vcpu->arch.hflags = emul_flags;
6021
6022 if (changed & HF_SMM_MASK)
6023 kvm_smm_changed(vcpu);
6024}
6025
6026static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
6027 unsigned long *db)
6028{
6029 u32 dr6 = 0;
6030 int i;
6031 u32 enable, rwlen;
6032
6033 enable = dr7;
6034 rwlen = dr7 >> 16;
6035 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
6036 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
6037 dr6 |= (1 << i);
6038 return dr6;
6039}
6040
6041static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
6042{
6043 struct kvm_run *kvm_run = vcpu->run;
6044
6045 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
6046 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
6047 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
6048 kvm_run->debug.arch.exception = DB_VECTOR;
6049 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6050 *r = EMULATE_USER_EXIT;
6051 } else {
6052
6053
6054
6055
6056
6057 vcpu->arch.dr6 &= ~15;
6058 vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
6059 kvm_queue_exception(vcpu, DB_VECTOR);
6060 }
6061}
6062
6063int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
6064{
6065 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
6066 int r = EMULATE_DONE;
6067
6068 kvm_x86_ops->skip_emulated_instruction(vcpu);
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078 if (unlikely(rflags & X86_EFLAGS_TF))
6079 kvm_vcpu_do_singlestep(vcpu, &r);
6080 return r == EMULATE_DONE;
6081}
6082EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
6083
6084static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
6085{
6086 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
6087 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
6088 struct kvm_run *kvm_run = vcpu->run;
6089 unsigned long eip = kvm_get_linear_rip(vcpu);
6090 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6091 vcpu->arch.guest_debug_dr7,
6092 vcpu->arch.eff_db);
6093
6094 if (dr6 != 0) {
6095 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
6096 kvm_run->debug.arch.pc = eip;
6097 kvm_run->debug.arch.exception = DB_VECTOR;
6098 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6099 *r = EMULATE_USER_EXIT;
6100 return true;
6101 }
6102 }
6103
6104 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
6105 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
6106 unsigned long eip = kvm_get_linear_rip(vcpu);
6107 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6108 vcpu->arch.dr7,
6109 vcpu->arch.db);
6110
6111 if (dr6 != 0) {
6112 vcpu->arch.dr6 &= ~15;
6113 vcpu->arch.dr6 |= dr6 | DR6_RTM;
6114 kvm_queue_exception(vcpu, DB_VECTOR);
6115 *r = EMULATE_DONE;
6116 return true;
6117 }
6118 }
6119
6120 return false;
6121}
6122
6123static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
6124{
6125 switch (ctxt->opcode_len) {
6126 case 1:
6127 switch (ctxt->b) {
6128 case 0xe4:
6129 case 0xe5:
6130 case 0xec:
6131 case 0xed:
6132 case 0xe6:
6133 case 0xe7:
6134 case 0xee:
6135 case 0xef:
6136 case 0x6c:
6137 case 0x6d:
6138 case 0x6e:
6139 case 0x6f:
6140 return true;
6141 }
6142 break;
6143 case 2:
6144 switch (ctxt->b) {
6145 case 0x33:
6146 return true;
6147 }
6148 break;
6149 }
6150
6151 return false;
6152}
6153
6154int x86_emulate_instruction(struct kvm_vcpu *vcpu,
6155 unsigned long cr2,
6156 int emulation_type,
6157 void *insn,
6158 int insn_len)
6159{
6160 int r;
6161 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6162 bool writeback = true;
6163 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
6164
6165 vcpu->arch.l1tf_flush_l1d = true;
6166
6167
6168
6169
6170
6171 vcpu->arch.write_fault_to_shadow_pgtable = false;
6172 kvm_clear_exception_queue(vcpu);
6173
6174 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
6175 init_emulate_ctxt(vcpu);
6176
6177
6178
6179
6180
6181
6182
6183 if (!(emulation_type & EMULTYPE_SKIP) &&
6184 kvm_vcpu_check_breakpoint(vcpu, &r))
6185 return r;
6186
6187 ctxt->interruptibility = 0;
6188 ctxt->have_exception = false;
6189 ctxt->exception.vector = -1;
6190 ctxt->perm_ok = false;
6191
6192 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
6193
6194 r = x86_decode_insn(ctxt, insn, insn_len);
6195
6196 trace_kvm_emulate_insn_start(vcpu);
6197 ++vcpu->stat.insn_emulation;
6198 if (r != EMULATION_OK) {
6199 if (emulation_type & EMULTYPE_TRAP_UD)
6200 return EMULATE_FAIL;
6201 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
6202 emulation_type))
6203 return EMULATE_DONE;
6204 if (ctxt->have_exception && inject_emulated_exception(vcpu))
6205 return EMULATE_DONE;
6206 if (emulation_type & EMULTYPE_SKIP)
6207 return EMULATE_FAIL;
6208 return handle_emulation_failure(vcpu, emulation_type);
6209 }
6210 }
6211
6212 if ((emulation_type & EMULTYPE_VMWARE) &&
6213 !is_vmware_backdoor_opcode(ctxt))
6214 return EMULATE_FAIL;
6215
6216 if (emulation_type & EMULTYPE_SKIP) {
6217 kvm_rip_write(vcpu, ctxt->_eip);
6218 if (ctxt->eflags & X86_EFLAGS_RF)
6219 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
6220 return EMULATE_DONE;
6221 }
6222
6223 if (retry_instruction(ctxt, cr2, emulation_type))
6224 return EMULATE_DONE;
6225
6226
6227
6228 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
6229 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6230 emulator_invalidate_register_cache(ctxt);
6231 }
6232
6233restart:
6234
6235 ctxt->exception.address = cr2;
6236
6237 r = x86_emulate_insn(ctxt);
6238
6239 if (r == EMULATION_INTERCEPTED)
6240 return EMULATE_DONE;
6241
6242 if (r == EMULATION_FAILED) {
6243 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
6244 emulation_type))
6245 return EMULATE_DONE;
6246
6247 return handle_emulation_failure(vcpu, emulation_type);
6248 }
6249
6250 if (ctxt->have_exception) {
6251 r = EMULATE_DONE;
6252 if (inject_emulated_exception(vcpu))
6253 return r;
6254 } else if (vcpu->arch.pio.count) {
6255 if (!vcpu->arch.pio.in) {
6256
6257 vcpu->arch.pio.count = 0;
6258 } else {
6259 writeback = false;
6260 vcpu->arch.complete_userspace_io = complete_emulated_pio;
6261 }
6262 r = EMULATE_USER_EXIT;
6263 } else if (vcpu->mmio_needed) {
6264 if (!vcpu->mmio_is_write)
6265 writeback = false;
6266 r = EMULATE_USER_EXIT;
6267 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6268 } else if (r == EMULATION_RESTART)
6269 goto restart;
6270 else
6271 r = EMULATE_DONE;
6272
6273 if (writeback) {
6274 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
6275 toggle_interruptibility(vcpu, ctxt->interruptibility);
6276 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6277 kvm_rip_write(vcpu, ctxt->eip);
6278 if (r == EMULATE_DONE &&
6279 (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
6280 kvm_vcpu_do_singlestep(vcpu, &r);
6281 if (!ctxt->have_exception ||
6282 exception_type(ctxt->exception.vector) == EXCPT_TRAP)
6283 __kvm_set_rflags(vcpu, ctxt->eflags);
6284
6285
6286
6287
6288
6289
6290
6291 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
6292 kvm_make_request(KVM_REQ_EVENT, vcpu);
6293 } else
6294 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
6295
6296 return r;
6297}
6298
6299int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
6300{
6301 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
6302}
6303EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
6304
6305int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
6306 void *insn, int insn_len)
6307{
6308 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
6309}
6310EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
6311
6312static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
6313 unsigned short port)
6314{
6315 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
6316 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
6317 size, port, &val, 1);
6318
6319 vcpu->arch.pio.count = 0;
6320 return ret;
6321}
6322
6323static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
6324{
6325 unsigned long val;
6326
6327
6328 BUG_ON(vcpu->arch.pio.count != 1);
6329
6330
6331 val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
6332 : 0;
6333
6334
6335
6336
6337
6338 emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
6339 vcpu->arch.pio.port, &val, 1);
6340 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
6341
6342 return 1;
6343}
6344
6345static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
6346 unsigned short port)
6347{
6348 unsigned long val;
6349 int ret;
6350
6351
6352 val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
6353
6354 ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
6355 &val, 1);
6356 if (ret) {
6357 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
6358 return ret;
6359 }
6360
6361 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
6362
6363 return 0;
6364}
6365
6366int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
6367{
6368 int ret = kvm_skip_emulated_instruction(vcpu);
6369
6370
6371
6372
6373
6374 if (in)
6375 return kvm_fast_pio_in(vcpu, size, port) && ret;
6376 else
6377 return kvm_fast_pio_out(vcpu, size, port) && ret;
6378}
6379EXPORT_SYMBOL_GPL(kvm_fast_pio);
6380
6381static int kvmclock_cpu_down_prep(unsigned int cpu)
6382{
6383 __this_cpu_write(cpu_tsc_khz, 0);
6384 return 0;
6385}
6386
6387static void tsc_khz_changed(void *data)
6388{
6389 struct cpufreq_freqs *freq = data;
6390 unsigned long khz = 0;
6391
6392 if (data)
6393 khz = freq->new;
6394 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
6395 khz = cpufreq_quick_get(raw_smp_processor_id());
6396 if (!khz)
6397 khz = tsc_khz;
6398 __this_cpu_write(cpu_tsc_khz, khz);
6399}
6400
6401#ifdef CONFIG_X86_64
6402static void kvm_hyperv_tsc_notifier(void)
6403{
6404 struct kvm *kvm;
6405 struct kvm_vcpu *vcpu;
6406 int cpu;
6407
6408 spin_lock(&kvm_lock);
6409 list_for_each_entry(kvm, &vm_list, vm_list)
6410 kvm_make_mclock_inprogress_request(kvm);
6411
6412 hyperv_stop_tsc_emulation();
6413
6414
6415 for_each_present_cpu(cpu)
6416 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
6417 kvm_max_guest_tsc_khz = tsc_khz;
6418
6419 list_for_each_entry(kvm, &vm_list, vm_list) {
6420 struct kvm_arch *ka = &kvm->arch;
6421
6422 spin_lock(&ka->pvclock_gtod_sync_lock);
6423
6424 pvclock_update_vm_gtod_copy(kvm);
6425
6426 kvm_for_each_vcpu(cpu, vcpu, kvm)
6427 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6428
6429 kvm_for_each_vcpu(cpu, vcpu, kvm)
6430 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
6431
6432 spin_unlock(&ka->pvclock_gtod_sync_lock);
6433 }
6434 spin_unlock(&kvm_lock);
6435}
6436#endif
6437
6438static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
6439 void *data)
6440{
6441 struct cpufreq_freqs *freq = data;
6442 struct kvm *kvm;
6443 struct kvm_vcpu *vcpu;
6444 int i, send_ipi = 0;
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
6486 return 0;
6487 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
6488 return 0;
6489
6490 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
6491
6492 spin_lock(&kvm_lock);
6493 list_for_each_entry(kvm, &vm_list, vm_list) {
6494 kvm_for_each_vcpu(i, vcpu, kvm) {
6495 if (vcpu->cpu != freq->cpu)
6496 continue;
6497 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6498 if (vcpu->cpu != smp_processor_id())
6499 send_ipi = 1;
6500 }
6501 }
6502 spin_unlock(&kvm_lock);
6503
6504 if (freq->old < freq->new && send_ipi) {
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
6518 }
6519 return 0;
6520}
6521
6522static struct notifier_block kvmclock_cpufreq_notifier_block = {
6523 .notifier_call = kvmclock_cpufreq_notifier
6524};
6525
6526static int kvmclock_cpu_online(unsigned int cpu)
6527{
6528 tsc_khz_changed(NULL);
6529 return 0;
6530}
6531
6532static void kvm_timer_init(void)
6533{
6534 max_tsc_khz = tsc_khz;
6535
6536 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
6537#ifdef CONFIG_CPU_FREQ
6538 struct cpufreq_policy policy;
6539 int cpu;
6540
6541 memset(&policy, 0, sizeof(policy));
6542 cpu = get_cpu();
6543 cpufreq_get_policy(&policy, cpu);
6544 if (policy.cpuinfo.max_freq)
6545 max_tsc_khz = policy.cpuinfo.max_freq;
6546 put_cpu();
6547#endif
6548 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
6549 CPUFREQ_TRANSITION_NOTIFIER);
6550 }
6551 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
6552
6553 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
6554 kvmclock_cpu_online, kvmclock_cpu_down_prep);
6555}
6556
6557DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
6558EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
6559
6560int kvm_is_in_guest(void)
6561{
6562 return __this_cpu_read(current_vcpu) != NULL;
6563}
6564
6565static int kvm_is_user_mode(void)
6566{
6567 int user_mode = 3;
6568
6569 if (__this_cpu_read(current_vcpu))
6570 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
6571
6572 return user_mode != 0;
6573}
6574
6575static unsigned long kvm_get_guest_ip(void)
6576{
6577 unsigned long ip = 0;
6578
6579 if (__this_cpu_read(current_vcpu))
6580 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
6581
6582 return ip;
6583}
6584
6585static struct perf_guest_info_callbacks kvm_guest_cbs = {
6586 .is_in_guest = kvm_is_in_guest,
6587 .is_user_mode = kvm_is_user_mode,
6588 .get_guest_ip = kvm_get_guest_ip,
6589};
6590
6591static void kvm_set_mmio_spte_mask(void)
6592{
6593 u64 mask;
6594 int maxphyaddr = boot_cpu_data.x86_phys_bits;
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605 mask = 1ull << 51;
6606
6607
6608 mask |= 1ull;
6609
6610
6611
6612
6613
6614 if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
6615 mask &= ~1ull;
6616
6617 kvm_mmu_set_mmio_spte_mask(mask, mask);
6618}
6619
6620#ifdef CONFIG_X86_64
6621static void pvclock_gtod_update_fn(struct work_struct *work)
6622{
6623 struct kvm *kvm;
6624
6625 struct kvm_vcpu *vcpu;
6626 int i;
6627
6628 spin_lock(&kvm_lock);
6629 list_for_each_entry(kvm, &vm_list, vm_list)
6630 kvm_for_each_vcpu(i, vcpu, kvm)
6631 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
6632 atomic_set(&kvm_guest_has_master_clock, 0);
6633 spin_unlock(&kvm_lock);
6634}
6635
6636static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
6637
6638
6639
6640
6641static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
6642 void *priv)
6643{
6644 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
6645 struct timekeeper *tk = priv;
6646
6647 update_pvclock_gtod(tk);
6648
6649
6650
6651
6652 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
6653 atomic_read(&kvm_guest_has_master_clock) != 0)
6654 queue_work(system_long_wq, &pvclock_gtod_work);
6655
6656 return 0;
6657}
6658
6659static struct notifier_block pvclock_gtod_notifier = {
6660 .notifier_call = pvclock_gtod_notify,
6661};
6662#endif
6663
6664int kvm_arch_init(void *opaque)
6665{
6666 int r;
6667 struct kvm_x86_ops *ops = opaque;
6668
6669 if (kvm_x86_ops) {
6670 printk(KERN_ERR "kvm: already loaded the other module\n");
6671 r = -EEXIST;
6672 goto out;
6673 }
6674
6675 if (!ops->cpu_has_kvm_support()) {
6676 printk(KERN_ERR "kvm: no hardware support\n");
6677 r = -EOPNOTSUPP;
6678 goto out;
6679 }
6680 if (ops->disabled_by_bios()) {
6681 printk(KERN_ERR "kvm: disabled by bios\n");
6682 r = -EOPNOTSUPP;
6683 goto out;
6684 }
6685
6686 r = -ENOMEM;
6687 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
6688 if (!shared_msrs) {
6689 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
6690 goto out;
6691 }
6692
6693 r = kvm_mmu_module_init();
6694 if (r)
6695 goto out_free_percpu;
6696
6697 kvm_set_mmio_spte_mask();
6698
6699 kvm_x86_ops = ops;
6700
6701 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
6702 PT_DIRTY_MASK, PT64_NX_MASK, 0,
6703 PT_PRESENT_MASK, 0, sme_me_mask);
6704 kvm_timer_init();
6705
6706 perf_register_guest_info_callbacks(&kvm_guest_cbs);
6707
6708 if (boot_cpu_has(X86_FEATURE_XSAVE))
6709 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
6710
6711 kvm_lapic_init();
6712#ifdef CONFIG_X86_64
6713 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
6714
6715 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
6716 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
6717#endif
6718
6719 return 0;
6720
6721out_free_percpu:
6722 free_percpu(shared_msrs);
6723out:
6724 return r;
6725}
6726
6727void kvm_arch_exit(void)
6728{
6729#ifdef CONFIG_X86_64
6730 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
6731 clear_hv_tscchange_cb();
6732#endif
6733 kvm_lapic_exit();
6734 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
6735
6736 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
6737 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
6738 CPUFREQ_TRANSITION_NOTIFIER);
6739 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
6740#ifdef CONFIG_X86_64
6741 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
6742#endif
6743 kvm_x86_ops = NULL;
6744 kvm_mmu_module_exit();
6745 free_percpu(shared_msrs);
6746}
6747
6748int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
6749{
6750 ++vcpu->stat.halt_exits;
6751 if (lapic_in_kernel(vcpu)) {
6752 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
6753 return 1;
6754 } else {
6755 vcpu->run->exit_reason = KVM_EXIT_HLT;
6756 return 0;
6757 }
6758}
6759EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
6760
6761int kvm_emulate_halt(struct kvm_vcpu *vcpu)
6762{
6763 int ret = kvm_skip_emulated_instruction(vcpu);
6764
6765
6766
6767
6768 return kvm_vcpu_halt(vcpu) && ret;
6769}
6770EXPORT_SYMBOL_GPL(kvm_emulate_halt);
6771
6772#ifdef CONFIG_X86_64
6773static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
6774 unsigned long clock_type)
6775{
6776 struct kvm_clock_pairing clock_pairing;
6777 struct timespec64 ts;
6778 u64 cycle;
6779 int ret;
6780
6781 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
6782 return -KVM_EOPNOTSUPP;
6783
6784 if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
6785 return -KVM_EOPNOTSUPP;
6786
6787 clock_pairing.sec = ts.tv_sec;
6788 clock_pairing.nsec = ts.tv_nsec;
6789 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
6790 clock_pairing.flags = 0;
6791
6792 ret = 0;
6793 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
6794 sizeof(struct kvm_clock_pairing)))
6795 ret = -KVM_EFAULT;
6796
6797 return ret;
6798}
6799#endif
6800
6801
6802
6803
6804
6805
6806static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
6807{
6808 struct kvm_lapic_irq lapic_irq;
6809
6810 lapic_irq.shorthand = 0;
6811 lapic_irq.dest_mode = 0;
6812 lapic_irq.level = 0;
6813 lapic_irq.dest_id = apicid;
6814 lapic_irq.msi_redir_hint = false;
6815
6816 lapic_irq.delivery_mode = APIC_DM_REMRD;
6817 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
6818}
6819
6820void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
6821{
6822 vcpu->arch.apicv_active = false;
6823 kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
6824}
6825
6826int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
6827{
6828 unsigned long nr, a0, a1, a2, a3, ret;
6829 int op_64_bit;
6830
6831 if (kvm_hv_hypercall_enabled(vcpu->kvm))
6832 return kvm_hv_hypercall(vcpu);
6833
6834 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
6835 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
6836 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
6837 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
6838 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
6839
6840 trace_kvm_hypercall(nr, a0, a1, a2, a3);
6841
6842 op_64_bit = is_64_bit_mode(vcpu);
6843 if (!op_64_bit) {
6844 nr &= 0xFFFFFFFF;
6845 a0 &= 0xFFFFFFFF;
6846 a1 &= 0xFFFFFFFF;
6847 a2 &= 0xFFFFFFFF;
6848 a3 &= 0xFFFFFFFF;
6849 }
6850
6851 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
6852 ret = -KVM_EPERM;
6853 goto out;
6854 }
6855
6856 switch (nr) {
6857 case KVM_HC_VAPIC_POLL_IRQ:
6858 ret = 0;
6859 break;
6860 case KVM_HC_KICK_CPU:
6861 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
6862 ret = 0;
6863 break;
6864#ifdef CONFIG_X86_64
6865 case KVM_HC_CLOCK_PAIRING:
6866 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
6867 break;
6868 case KVM_HC_SEND_IPI:
6869 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
6870 break;
6871#endif
6872 default:
6873 ret = -KVM_ENOSYS;
6874 break;
6875 }
6876out:
6877 if (!op_64_bit)
6878 ret = (u32)ret;
6879 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
6880
6881 ++vcpu->stat.hypercalls;
6882 return kvm_skip_emulated_instruction(vcpu);
6883}
6884EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
6885
6886static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
6887{
6888 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6889 char instruction[3];
6890 unsigned long rip = kvm_rip_read(vcpu);
6891
6892 kvm_x86_ops->patch_hypercall(vcpu, instruction);
6893
6894 return emulator_write_emulated(ctxt, rip, instruction, 3,
6895 &ctxt->exception);
6896}
6897
6898static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
6899{
6900 return vcpu->run->request_interrupt_window &&
6901 likely(!pic_in_kernel(vcpu->kvm));
6902}
6903
6904static void post_kvm_run_save(struct kvm_vcpu *vcpu)
6905{
6906 struct kvm_run *kvm_run = vcpu->run;
6907
6908 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
6909 kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
6910 kvm_run->cr8 = kvm_get_cr8(vcpu);
6911 kvm_run->apic_base = kvm_get_apic_base(vcpu);
6912 kvm_run->ready_for_interrupt_injection =
6913 pic_in_kernel(vcpu->kvm) ||
6914 kvm_vcpu_ready_for_interrupt_injection(vcpu);
6915}
6916
6917static void update_cr8_intercept(struct kvm_vcpu *vcpu)
6918{
6919 int max_irr, tpr;
6920
6921 if (!kvm_x86_ops->update_cr8_intercept)
6922 return;
6923
6924 if (!lapic_in_kernel(vcpu))
6925 return;
6926
6927 if (vcpu->arch.apicv_active)
6928 return;
6929
6930 if (!vcpu->arch.apic->vapic_addr)
6931 max_irr = kvm_lapic_find_highest_irr(vcpu);
6932 else
6933 max_irr = -1;
6934
6935 if (max_irr != -1)
6936 max_irr >>= 4;
6937
6938 tpr = kvm_lapic_get_cr8(vcpu);
6939
6940 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
6941}
6942
6943static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
6944{
6945 int r;
6946
6947
6948
6949 if (vcpu->arch.exception.injected)
6950 kvm_x86_ops->queue_exception(vcpu);
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965 else if (!vcpu->arch.exception.pending) {
6966 if (vcpu->arch.nmi_injected)
6967 kvm_x86_ops->set_nmi(vcpu);
6968 else if (vcpu->arch.interrupt.injected)
6969 kvm_x86_ops->set_irq(vcpu);
6970 }
6971
6972
6973
6974
6975
6976
6977
6978 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
6979 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
6980 if (r != 0)
6981 return r;
6982 }
6983
6984
6985 if (vcpu->arch.exception.pending) {
6986 trace_kvm_inj_exception(vcpu->arch.exception.nr,
6987 vcpu->arch.exception.has_error_code,
6988 vcpu->arch.exception.error_code);
6989
6990 WARN_ON_ONCE(vcpu->arch.exception.injected);
6991 vcpu->arch.exception.pending = false;
6992 vcpu->arch.exception.injected = true;
6993
6994 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
6995 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
6996 X86_EFLAGS_RF);
6997
6998 if (vcpu->arch.exception.nr == DB_VECTOR &&
6999 (vcpu->arch.dr7 & DR7_GD)) {
7000 vcpu->arch.dr7 &= ~DR7_GD;
7001 kvm_update_dr7(vcpu);
7002 }
7003
7004 kvm_x86_ops->queue_exception(vcpu);
7005 }
7006
7007
7008 if (kvm_event_needs_reinjection(vcpu))
7009 return 0;
7010
7011 if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
7012 kvm_x86_ops->smi_allowed(vcpu)) {
7013 vcpu->arch.smi_pending = false;
7014 ++vcpu->arch.smi_count;
7015 enter_smm(vcpu);
7016 } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
7017 --vcpu->arch.nmi_pending;
7018 vcpu->arch.nmi_injected = true;
7019 kvm_x86_ops->set_nmi(vcpu);
7020 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
7021
7022
7023
7024
7025
7026
7027
7028 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
7029 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
7030 if (r != 0)
7031 return r;
7032 }
7033 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
7034 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
7035 false);
7036 kvm_x86_ops->set_irq(vcpu);
7037 }
7038 }
7039
7040 return 0;
7041}
7042
7043static void process_nmi(struct kvm_vcpu *vcpu)
7044{
7045 unsigned limit = 2;
7046
7047
7048
7049
7050
7051
7052 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
7053 limit = 1;
7054
7055 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
7056 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
7057 kvm_make_request(KVM_REQ_EVENT, vcpu);
7058}
7059
7060static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
7061{
7062 u32 flags = 0;
7063 flags |= seg->g << 23;
7064 flags |= seg->db << 22;
7065 flags |= seg->l << 21;
7066 flags |= seg->avl << 20;
7067 flags |= seg->present << 15;
7068 flags |= seg->dpl << 13;
7069 flags |= seg->s << 12;
7070 flags |= seg->type << 8;
7071 return flags;
7072}
7073
7074static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
7075{
7076 struct kvm_segment seg;
7077 int offset;
7078
7079 kvm_get_segment(vcpu, &seg, n);
7080 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
7081
7082 if (n < 3)
7083 offset = 0x7f84 + n * 12;
7084 else
7085 offset = 0x7f2c + (n - 3) * 12;
7086
7087 put_smstate(u32, buf, offset + 8, seg.base);
7088 put_smstate(u32, buf, offset + 4, seg.limit);
7089 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
7090}
7091
7092#ifdef CONFIG_X86_64
7093static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
7094{
7095 struct kvm_segment seg;
7096 int offset;
7097 u16 flags;
7098
7099 kvm_get_segment(vcpu, &seg, n);
7100 offset = 0x7e00 + n * 16;
7101
7102 flags = enter_smm_get_segment_flags(&seg) >> 8;
7103 put_smstate(u16, buf, offset, seg.selector);
7104 put_smstate(u16, buf, offset + 2, flags);
7105 put_smstate(u32, buf, offset + 4, seg.limit);
7106 put_smstate(u64, buf, offset + 8, seg.base);
7107}
7108#endif
7109
7110static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
7111{
7112 struct desc_ptr dt;
7113 struct kvm_segment seg;
7114 unsigned long val;
7115 int i;
7116
7117 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
7118 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
7119 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
7120 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
7121
7122 for (i = 0; i < 8; i++)
7123 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
7124
7125 kvm_get_dr(vcpu, 6, &val);
7126 put_smstate(u32, buf, 0x7fcc, (u32)val);
7127 kvm_get_dr(vcpu, 7, &val);
7128 put_smstate(u32, buf, 0x7fc8, (u32)val);
7129
7130 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
7131 put_smstate(u32, buf, 0x7fc4, seg.selector);
7132 put_smstate(u32, buf, 0x7f64, seg.base);
7133 put_smstate(u32, buf, 0x7f60, seg.limit);
7134 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
7135
7136 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
7137 put_smstate(u32, buf, 0x7fc0, seg.selector);
7138 put_smstate(u32, buf, 0x7f80, seg.base);
7139 put_smstate(u32, buf, 0x7f7c, seg.limit);
7140 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
7141
7142 kvm_x86_ops->get_gdt(vcpu, &dt);
7143 put_smstate(u32, buf, 0x7f74, dt.address);
7144 put_smstate(u32, buf, 0x7f70, dt.size);
7145
7146 kvm_x86_ops->get_idt(vcpu, &dt);
7147 put_smstate(u32, buf, 0x7f58, dt.address);
7148 put_smstate(u32, buf, 0x7f54, dt.size);
7149
7150 for (i = 0; i < 6; i++)
7151 enter_smm_save_seg_32(vcpu, buf, i);
7152
7153 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
7154
7155
7156 put_smstate(u32, buf, 0x7efc, 0x00020000);
7157 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
7158}
7159
7160static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
7161{
7162#ifdef CONFIG_X86_64
7163 struct desc_ptr dt;
7164 struct kvm_segment seg;
7165 unsigned long val;
7166 int i;
7167
7168 for (i = 0; i < 16; i++)
7169 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
7170
7171 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
7172 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
7173
7174 kvm_get_dr(vcpu, 6, &val);
7175 put_smstate(u64, buf, 0x7f68, val);
7176 kvm_get_dr(vcpu, 7, &val);
7177 put_smstate(u64, buf, 0x7f60, val);
7178
7179 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
7180 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
7181 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
7182
7183 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
7184
7185
7186 put_smstate(u32, buf, 0x7efc, 0x00020064);
7187
7188 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
7189
7190 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
7191 put_smstate(u16, buf, 0x7e90, seg.selector);
7192 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
7193 put_smstate(u32, buf, 0x7e94, seg.limit);
7194 put_smstate(u64, buf, 0x7e98, seg.base);
7195
7196 kvm_x86_ops->get_idt(vcpu, &dt);
7197 put_smstate(u32, buf, 0x7e84, dt.size);
7198 put_smstate(u64, buf, 0x7e88, dt.address);
7199
7200 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
7201 put_smstate(u16, buf, 0x7e70, seg.selector);
7202 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
7203 put_smstate(u32, buf, 0x7e74, seg.limit);
7204 put_smstate(u64, buf, 0x7e78, seg.base);
7205
7206 kvm_x86_ops->get_gdt(vcpu, &dt);
7207 put_smstate(u32, buf, 0x7e64, dt.size);
7208 put_smstate(u64, buf, 0x7e68, dt.address);
7209
7210 for (i = 0; i < 6; i++)
7211 enter_smm_save_seg_64(vcpu, buf, i);
7212#else
7213 WARN_ON_ONCE(1);
7214#endif
7215}
7216
7217static void enter_smm(struct kvm_vcpu *vcpu)
7218{
7219 struct kvm_segment cs, ds;
7220 struct desc_ptr dt;
7221 char buf[512];
7222 u32 cr0;
7223
7224 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
7225 memset(buf, 0, 512);
7226 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
7227 enter_smm_save_state_64(vcpu, buf);
7228 else
7229 enter_smm_save_state_32(vcpu, buf);
7230
7231
7232
7233
7234
7235
7236 kvm_x86_ops->pre_enter_smm(vcpu, buf);
7237
7238 vcpu->arch.hflags |= HF_SMM_MASK;
7239 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
7240
7241 if (kvm_x86_ops->get_nmi_mask(vcpu))
7242 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
7243 else
7244 kvm_x86_ops->set_nmi_mask(vcpu, true);
7245
7246 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
7247 kvm_rip_write(vcpu, 0x8000);
7248
7249 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
7250 kvm_x86_ops->set_cr0(vcpu, cr0);
7251 vcpu->arch.cr0 = cr0;
7252
7253 kvm_x86_ops->set_cr4(vcpu, 0);
7254
7255
7256 dt.address = dt.size = 0;
7257 kvm_x86_ops->set_idt(vcpu, &dt);
7258
7259 __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
7260
7261 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
7262 cs.base = vcpu->arch.smbase;
7263
7264 ds.selector = 0;
7265 ds.base = 0;
7266
7267 cs.limit = ds.limit = 0xffffffff;
7268 cs.type = ds.type = 0x3;
7269 cs.dpl = ds.dpl = 0;
7270 cs.db = ds.db = 0;
7271 cs.s = ds.s = 1;
7272 cs.l = ds.l = 0;
7273 cs.g = ds.g = 1;
7274 cs.avl = ds.avl = 0;
7275 cs.present = ds.present = 1;
7276 cs.unusable = ds.unusable = 0;
7277 cs.padding = ds.padding = 0;
7278
7279 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
7280 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
7281 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
7282 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
7283 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
7284 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
7285
7286 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
7287 kvm_x86_ops->set_efer(vcpu, 0);
7288
7289 kvm_update_cpuid(vcpu);
7290 kvm_mmu_reset_context(vcpu);
7291}
7292
7293static void process_smi(struct kvm_vcpu *vcpu)
7294{
7295 vcpu->arch.smi_pending = true;
7296 kvm_make_request(KVM_REQ_EVENT, vcpu);
7297}
7298
7299void kvm_make_scan_ioapic_request(struct kvm *kvm)
7300{
7301 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
7302}
7303
7304static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
7305{
7306 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
7307 return;
7308
7309 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
7310
7311 if (irqchip_split(vcpu->kvm))
7312 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
7313 else {
7314 if (vcpu->arch.apicv_active)
7315 kvm_x86_ops->sync_pir_to_irr(vcpu);
7316 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
7317 }
7318
7319 if (is_guest_mode(vcpu))
7320 vcpu->arch.load_eoi_exitmap_pending = true;
7321 else
7322 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
7323}
7324
7325static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
7326{
7327 u64 eoi_exit_bitmap[4];
7328
7329 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
7330 return;
7331
7332 bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
7333 vcpu_to_synic(vcpu)->vec_bitmap, 256);
7334 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
7335}
7336
7337int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
7338 unsigned long start, unsigned long end,
7339 bool blockable)
7340{
7341 unsigned long apic_address;
7342
7343
7344
7345
7346
7347 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
7348 if (start <= apic_address && apic_address < end)
7349 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
7350
7351 return 0;
7352}
7353
7354void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
7355{
7356 struct page *page = NULL;
7357
7358 if (!lapic_in_kernel(vcpu))
7359 return;
7360
7361 if (!kvm_x86_ops->set_apic_access_page_addr)
7362 return;
7363
7364 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
7365 if (is_error_page(page))
7366 return;
7367 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
7368
7369
7370
7371
7372
7373 put_page(page);
7374}
7375EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
7376
7377void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
7378{
7379 smp_send_reschedule(vcpu->cpu);
7380}
7381EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
7382
7383
7384
7385
7386
7387
7388static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
7389{
7390 int r;
7391 bool req_int_win =
7392 dm_request_for_irq_injection(vcpu) &&
7393 kvm_cpu_accept_dm_intr(vcpu);
7394
7395 bool req_immediate_exit = false;
7396
7397 if (kvm_request_pending(vcpu)) {
7398 if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
7399 kvm_x86_ops->get_vmcs12_pages(vcpu);
7400 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
7401 kvm_mmu_unload(vcpu);
7402 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
7403 __kvm_migrate_timers(vcpu);
7404 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
7405 kvm_gen_update_masterclock(vcpu->kvm);
7406 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
7407 kvm_gen_kvmclock_update(vcpu);
7408 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
7409 r = kvm_guest_time_update(vcpu);
7410 if (unlikely(r))
7411 goto out;
7412 }
7413 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
7414 kvm_mmu_sync_roots(vcpu);
7415 if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
7416 kvm_mmu_load_cr3(vcpu);
7417 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
7418 kvm_vcpu_flush_tlb(vcpu, true);
7419 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
7420 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
7421 r = 0;
7422 goto out;
7423 }
7424 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
7425 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
7426 vcpu->mmio_needed = 0;
7427 r = 0;
7428 goto out;
7429 }
7430 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
7431
7432 vcpu->arch.apf.halted = true;
7433 r = 1;
7434 goto out;
7435 }
7436 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
7437 record_steal_time(vcpu);
7438 if (kvm_check_request(KVM_REQ_SMI, vcpu))
7439 process_smi(vcpu);
7440 if (kvm_check_request(KVM_REQ_NMI, vcpu))
7441 process_nmi(vcpu);
7442 if (kvm_check_request(KVM_REQ_PMU, vcpu))
7443 kvm_pmu_handle_event(vcpu);
7444 if (kvm_check_request(KVM_REQ_PMI, vcpu))
7445 kvm_pmu_deliver_pmi(vcpu);
7446 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
7447 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
7448 if (test_bit(vcpu->arch.pending_ioapic_eoi,
7449 vcpu->arch.ioapic_handled_vectors)) {
7450 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
7451 vcpu->run->eoi.vector =
7452 vcpu->arch.pending_ioapic_eoi;
7453 r = 0;
7454 goto out;
7455 }
7456 }
7457 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
7458 vcpu_scan_ioapic(vcpu);
7459 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
7460 vcpu_load_eoi_exitmap(vcpu);
7461 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
7462 kvm_vcpu_reload_apic_access_page(vcpu);
7463 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
7464 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
7465 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
7466 r = 0;
7467 goto out;
7468 }
7469 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
7470 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
7471 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
7472 r = 0;
7473 goto out;
7474 }
7475 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
7476 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
7477 vcpu->run->hyperv = vcpu->arch.hyperv.exit;
7478 r = 0;
7479 goto out;
7480 }
7481
7482
7483
7484
7485
7486
7487 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
7488 kvm_hv_process_stimers(vcpu);
7489 }
7490
7491 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
7492 ++vcpu->stat.req_event;
7493 kvm_apic_accept_events(vcpu);
7494 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
7495 r = 1;
7496 goto out;
7497 }
7498
7499 if (inject_pending_event(vcpu, req_int_win) != 0)
7500 req_immediate_exit = true;
7501 else {
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516 if (vcpu->arch.smi_pending && !is_smm(vcpu))
7517 if (!kvm_x86_ops->enable_smi_window(vcpu))
7518 req_immediate_exit = true;
7519 if (vcpu->arch.nmi_pending)
7520 kvm_x86_ops->enable_nmi_window(vcpu);
7521 if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
7522 kvm_x86_ops->enable_irq_window(vcpu);
7523 WARN_ON(vcpu->arch.exception.pending);
7524 }
7525
7526 if (kvm_lapic_enabled(vcpu)) {
7527 update_cr8_intercept(vcpu);
7528 kvm_lapic_sync_to_vapic(vcpu);
7529 }
7530 }
7531
7532 r = kvm_mmu_reload(vcpu);
7533 if (unlikely(r)) {
7534 goto cancel_injection;
7535 }
7536
7537 preempt_disable();
7538
7539 kvm_x86_ops->prepare_guest_switch(vcpu);
7540
7541
7542
7543
7544
7545
7546 local_irq_disable();
7547 vcpu->mode = IN_GUEST_MODE;
7548
7549 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563 smp_mb__after_srcu_read_unlock();
7564
7565
7566
7567
7568
7569 if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
7570 kvm_x86_ops->sync_pir_to_irr(vcpu);
7571
7572 if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
7573 || need_resched() || signal_pending(current)) {
7574 vcpu->mode = OUTSIDE_GUEST_MODE;
7575 smp_wmb();
7576 local_irq_enable();
7577 preempt_enable();
7578 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
7579 r = 1;
7580 goto cancel_injection;
7581 }
7582
7583 kvm_load_guest_xcr0(vcpu);
7584
7585 if (req_immediate_exit) {
7586 kvm_make_request(KVM_REQ_EVENT, vcpu);
7587 kvm_x86_ops->request_immediate_exit(vcpu);
7588 }
7589
7590 trace_kvm_entry(vcpu->vcpu_id);
7591 if (lapic_timer_advance_ns)
7592 wait_lapic_expire(vcpu);
7593 guest_enter_irqoff();
7594
7595 if (unlikely(vcpu->arch.switch_db_regs)) {
7596 set_debugreg(0, 7);
7597 set_debugreg(vcpu->arch.eff_db[0], 0);
7598 set_debugreg(vcpu->arch.eff_db[1], 1);
7599 set_debugreg(vcpu->arch.eff_db[2], 2);
7600 set_debugreg(vcpu->arch.eff_db[3], 3);
7601 set_debugreg(vcpu->arch.dr6, 6);
7602 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
7603 }
7604
7605 kvm_x86_ops->run(vcpu);
7606
7607
7608
7609
7610
7611
7612
7613 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
7614 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
7615 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
7616 kvm_update_dr0123(vcpu);
7617 kvm_update_dr6(vcpu);
7618 kvm_update_dr7(vcpu);
7619 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
7620 }
7621
7622
7623
7624
7625
7626
7627
7628
7629 if (hw_breakpoint_active())
7630 hw_breakpoint_restore();
7631
7632 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
7633
7634 vcpu->mode = OUTSIDE_GUEST_MODE;
7635 smp_wmb();
7636
7637 kvm_put_guest_xcr0(vcpu);
7638
7639 kvm_before_interrupt(vcpu);
7640 kvm_x86_ops->handle_external_intr(vcpu);
7641 kvm_after_interrupt(vcpu);
7642
7643 ++vcpu->stat.exits;
7644
7645 guest_exit_irqoff();
7646
7647 local_irq_enable();
7648 preempt_enable();
7649
7650 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
7651
7652
7653
7654
7655 if (unlikely(prof_on == KVM_PROFILING)) {
7656 unsigned long rip = kvm_rip_read(vcpu);
7657 profile_hit(KVM_PROFILING, (void *)rip);
7658 }
7659
7660 if (unlikely(vcpu->arch.tsc_always_catchup))
7661 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7662
7663 if (vcpu->arch.apic_attention)
7664 kvm_lapic_sync_from_vapic(vcpu);
7665
7666 vcpu->arch.gpa_available = false;
7667 r = kvm_x86_ops->handle_exit(vcpu);
7668 return r;
7669
7670cancel_injection:
7671 kvm_x86_ops->cancel_injection(vcpu);
7672 if (unlikely(vcpu->arch.apic_attention))
7673 kvm_lapic_sync_from_vapic(vcpu);
7674out:
7675 return r;
7676}
7677
7678static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
7679{
7680 if (!kvm_arch_vcpu_runnable(vcpu) &&
7681 (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
7682 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
7683 kvm_vcpu_block(vcpu);
7684 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
7685
7686 if (kvm_x86_ops->post_block)
7687 kvm_x86_ops->post_block(vcpu);
7688
7689 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
7690 return 1;
7691 }
7692
7693 kvm_apic_accept_events(vcpu);
7694 switch(vcpu->arch.mp_state) {
7695 case KVM_MP_STATE_HALTED:
7696 vcpu->arch.pv.pv_unhalted = false;
7697 vcpu->arch.mp_state =
7698 KVM_MP_STATE_RUNNABLE;
7699 case KVM_MP_STATE_RUNNABLE:
7700 vcpu->arch.apf.halted = false;
7701 break;
7702 case KVM_MP_STATE_INIT_RECEIVED:
7703 break;
7704 default:
7705 return -EINTR;
7706 break;
7707 }
7708 return 1;
7709}
7710
7711static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
7712{
7713 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
7714 kvm_x86_ops->check_nested_events(vcpu, false);
7715
7716 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7717 !vcpu->arch.apf.halted);
7718}
7719
7720static int vcpu_run(struct kvm_vcpu *vcpu)
7721{
7722 int r;
7723 struct kvm *kvm = vcpu->kvm;
7724
7725 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
7726 vcpu->arch.l1tf_flush_l1d = true;
7727
7728 for (;;) {
7729 if (kvm_vcpu_running(vcpu)) {
7730 r = vcpu_enter_guest(vcpu);
7731 } else {
7732 r = vcpu_block(kvm, vcpu);
7733 }
7734
7735 if (r <= 0)
7736 break;
7737
7738 kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
7739 if (kvm_cpu_has_pending_timer(vcpu))
7740 kvm_inject_pending_timer_irqs(vcpu);
7741
7742 if (dm_request_for_irq_injection(vcpu) &&
7743 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
7744 r = 0;
7745 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
7746 ++vcpu->stat.request_irq_exits;
7747 break;
7748 }
7749
7750 kvm_check_async_pf_completion(vcpu);
7751
7752 if (signal_pending(current)) {
7753 r = -EINTR;
7754 vcpu->run->exit_reason = KVM_EXIT_INTR;
7755 ++vcpu->stat.signal_exits;
7756 break;
7757 }
7758 if (need_resched()) {
7759 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
7760 cond_resched();
7761 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
7762 }
7763 }
7764
7765 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
7766
7767 return r;
7768}
7769
7770static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
7771{
7772 int r;
7773 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
7774 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
7775 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
7776 if (r != EMULATE_DONE)
7777 return 0;
7778 return 1;
7779}
7780
7781static int complete_emulated_pio(struct kvm_vcpu *vcpu)
7782{
7783 BUG_ON(!vcpu->arch.pio.count);
7784
7785 return complete_emulated_io(vcpu);
7786}
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
7807{
7808 struct kvm_run *run = vcpu->run;
7809 struct kvm_mmio_fragment *frag;
7810 unsigned len;
7811
7812 BUG_ON(!vcpu->mmio_needed);
7813
7814
7815 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
7816 len = min(8u, frag->len);
7817 if (!vcpu->mmio_is_write)
7818 memcpy(frag->data, run->mmio.data, len);
7819
7820 if (frag->len <= 8) {
7821
7822 frag++;
7823 vcpu->mmio_cur_fragment++;
7824 } else {
7825
7826 frag->data += len;
7827 frag->gpa += len;
7828 frag->len -= len;
7829 }
7830
7831 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
7832 vcpu->mmio_needed = 0;
7833
7834
7835 if (vcpu->mmio_is_write)
7836 return 1;
7837 vcpu->mmio_read_completed = 1;
7838 return complete_emulated_io(vcpu);
7839 }
7840
7841 run->exit_reason = KVM_EXIT_MMIO;
7842 run->mmio.phys_addr = frag->gpa;
7843 if (vcpu->mmio_is_write)
7844 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
7845 run->mmio.len = min(8u, frag->len);
7846 run->mmio.is_write = vcpu->mmio_is_write;
7847 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
7848 return 0;
7849}
7850
7851
7852static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
7853{
7854 preempt_disable();
7855 copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
7856
7857 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
7858 ~XFEATURE_MASK_PKRU);
7859 preempt_enable();
7860 trace_kvm_fpu(1);
7861}
7862
7863
7864static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
7865{
7866 preempt_disable();
7867 copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
7868 copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
7869 preempt_enable();
7870 ++vcpu->stat.fpu_reload;
7871 trace_kvm_fpu(0);
7872}
7873
7874int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7875{
7876 int r;
7877
7878 vcpu_load(vcpu);
7879 kvm_sigset_activate(vcpu);
7880 kvm_load_guest_fpu(vcpu);
7881
7882 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
7883 if (kvm_run->immediate_exit) {
7884 r = -EINTR;
7885 goto out;
7886 }
7887 kvm_vcpu_block(vcpu);
7888 kvm_apic_accept_events(vcpu);
7889 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
7890 r = -EAGAIN;
7891 if (signal_pending(current)) {
7892 r = -EINTR;
7893 vcpu->run->exit_reason = KVM_EXIT_INTR;
7894 ++vcpu->stat.signal_exits;
7895 }
7896 goto out;
7897 }
7898
7899 if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
7900 r = -EINVAL;
7901 goto out;
7902 }
7903
7904 if (vcpu->run->kvm_dirty_regs) {
7905 r = sync_regs(vcpu);
7906 if (r != 0)
7907 goto out;
7908 }
7909
7910
7911 if (!lapic_in_kernel(vcpu)) {
7912 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
7913 r = -EINVAL;
7914 goto out;
7915 }
7916 }
7917
7918 if (unlikely(vcpu->arch.complete_userspace_io)) {
7919 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
7920 vcpu->arch.complete_userspace_io = NULL;
7921 r = cui(vcpu);
7922 if (r <= 0)
7923 goto out;
7924 } else
7925 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
7926
7927 if (kvm_run->immediate_exit)
7928 r = -EINTR;
7929 else
7930 r = vcpu_run(vcpu);
7931
7932out:
7933 kvm_put_guest_fpu(vcpu);
7934 if (vcpu->run->kvm_valid_regs)
7935 store_regs(vcpu);
7936 post_kvm_run_save(vcpu);
7937 kvm_sigset_deactivate(vcpu);
7938
7939 vcpu_put(vcpu);
7940 return r;
7941}
7942
7943static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7944{
7945 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
7946
7947
7948
7949
7950
7951
7952
7953 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
7954 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
7955 }
7956 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
7957 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
7958 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
7959 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
7960 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
7961 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
7962 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
7963 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
7964#ifdef CONFIG_X86_64
7965 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
7966 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
7967 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
7968 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
7969 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
7970 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
7971 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
7972 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
7973#endif
7974
7975 regs->rip = kvm_rip_read(vcpu);
7976 regs->rflags = kvm_get_rflags(vcpu);
7977}
7978
7979int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7980{
7981 vcpu_load(vcpu);
7982 __get_regs(vcpu, regs);
7983 vcpu_put(vcpu);
7984 return 0;
7985}
7986
7987static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7988{
7989 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
7990 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
7991
7992 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
7993 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
7994 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
7995 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
7996 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
7997 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
7998 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
7999 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
8000#ifdef CONFIG_X86_64
8001 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
8002 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
8003 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
8004 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
8005 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
8006 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
8007 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
8008 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
8009#endif
8010
8011 kvm_rip_write(vcpu, regs->rip);
8012 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
8013
8014 vcpu->arch.exception.pending = false;
8015
8016 kvm_make_request(KVM_REQ_EVENT, vcpu);
8017}
8018
8019int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8020{
8021 vcpu_load(vcpu);
8022 __set_regs(vcpu, regs);
8023 vcpu_put(vcpu);
8024 return 0;
8025}
8026
8027void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
8028{
8029 struct kvm_segment cs;
8030
8031 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
8032 *db = cs.db;
8033 *l = cs.l;
8034}
8035EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
8036
8037static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8038{
8039 struct desc_ptr dt;
8040
8041 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
8042 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
8043 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
8044 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
8045 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
8046 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
8047
8048 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
8049 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
8050
8051 kvm_x86_ops->get_idt(vcpu, &dt);
8052 sregs->idt.limit = dt.size;
8053 sregs->idt.base = dt.address;
8054 kvm_x86_ops->get_gdt(vcpu, &dt);
8055 sregs->gdt.limit = dt.size;
8056 sregs->gdt.base = dt.address;
8057
8058 sregs->cr0 = kvm_read_cr0(vcpu);
8059 sregs->cr2 = vcpu->arch.cr2;
8060 sregs->cr3 = kvm_read_cr3(vcpu);
8061 sregs->cr4 = kvm_read_cr4(vcpu);
8062 sregs->cr8 = kvm_get_cr8(vcpu);
8063 sregs->efer = vcpu->arch.efer;
8064 sregs->apic_base = kvm_get_apic_base(vcpu);
8065
8066 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
8067
8068 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
8069 set_bit(vcpu->arch.interrupt.nr,
8070 (unsigned long *)sregs->interrupt_bitmap);
8071}
8072
8073int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
8074 struct kvm_sregs *sregs)
8075{
8076 vcpu_load(vcpu);
8077 __get_sregs(vcpu, sregs);
8078 vcpu_put(vcpu);
8079 return 0;
8080}
8081
8082int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
8083 struct kvm_mp_state *mp_state)
8084{
8085 vcpu_load(vcpu);
8086
8087 kvm_apic_accept_events(vcpu);
8088 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
8089 vcpu->arch.pv.pv_unhalted)
8090 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
8091 else
8092 mp_state->mp_state = vcpu->arch.mp_state;
8093
8094 vcpu_put(vcpu);
8095 return 0;
8096}
8097
8098int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
8099 struct kvm_mp_state *mp_state)
8100{
8101 int ret = -EINVAL;
8102
8103 vcpu_load(vcpu);
8104
8105 if (!lapic_in_kernel(vcpu) &&
8106 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
8107 goto out;
8108
8109
8110 if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
8111 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
8112 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
8113 goto out;
8114
8115 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
8116 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
8117 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
8118 } else
8119 vcpu->arch.mp_state = mp_state->mp_state;
8120 kvm_make_request(KVM_REQ_EVENT, vcpu);
8121
8122 ret = 0;
8123out:
8124 vcpu_put(vcpu);
8125 return ret;
8126}
8127
8128int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
8129 int reason, bool has_error_code, u32 error_code)
8130{
8131 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
8132 int ret;
8133
8134 init_emulate_ctxt(vcpu);
8135
8136 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
8137 has_error_code, error_code);
8138
8139 if (ret)
8140 return EMULATE_FAIL;
8141
8142 kvm_rip_write(vcpu, ctxt->eip);
8143 kvm_set_rflags(vcpu, ctxt->eflags);
8144 kvm_make_request(KVM_REQ_EVENT, vcpu);
8145 return EMULATE_DONE;
8146}
8147EXPORT_SYMBOL_GPL(kvm_task_switch);
8148
8149static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8150{
8151 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
8152 (sregs->cr4 & X86_CR4_OSXSAVE))
8153 return -EINVAL;
8154
8155 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
8156
8157
8158
8159
8160
8161 if (!(sregs->cr4 & X86_CR4_PAE)
8162 || !(sregs->efer & EFER_LMA))
8163 return -EINVAL;
8164 } else {
8165
8166
8167
8168
8169 if (sregs->efer & EFER_LMA || sregs->cs.l)
8170 return -EINVAL;
8171 }
8172
8173 return 0;
8174}
8175
8176static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8177{
8178 struct msr_data apic_base_msr;
8179 int mmu_reset_needed = 0;
8180 int cpuid_update_needed = 0;
8181 int pending_vec, max_bits, idx;
8182 struct desc_ptr dt;
8183 int ret = -EINVAL;
8184
8185 if (kvm_valid_sregs(vcpu, sregs))
8186 goto out;
8187
8188 apic_base_msr.data = sregs->apic_base;
8189 apic_base_msr.host_initiated = true;
8190 if (kvm_set_apic_base(vcpu, &apic_base_msr))
8191 goto out;
8192
8193 dt.size = sregs->idt.limit;
8194 dt.address = sregs->idt.base;
8195 kvm_x86_ops->set_idt(vcpu, &dt);
8196 dt.size = sregs->gdt.limit;
8197 dt.address = sregs->gdt.base;
8198 kvm_x86_ops->set_gdt(vcpu, &dt);
8199
8200 vcpu->arch.cr2 = sregs->cr2;
8201 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
8202 vcpu->arch.cr3 = sregs->cr3;
8203 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
8204
8205 kvm_set_cr8(vcpu, sregs->cr8);
8206
8207 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
8208 kvm_x86_ops->set_efer(vcpu, sregs->efer);
8209
8210 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
8211 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
8212 vcpu->arch.cr0 = sregs->cr0;
8213
8214 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
8215 cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
8216 (X86_CR4_OSXSAVE | X86_CR4_PKE));
8217 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
8218 if (cpuid_update_needed)
8219 kvm_update_cpuid(vcpu);
8220
8221 idx = srcu_read_lock(&vcpu->kvm->srcu);
8222 if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
8223 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
8224 mmu_reset_needed = 1;
8225 }
8226 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8227
8228 if (mmu_reset_needed)
8229 kvm_mmu_reset_context(vcpu);
8230
8231 max_bits = KVM_NR_INTERRUPTS;
8232 pending_vec = find_first_bit(
8233 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
8234 if (pending_vec < max_bits) {
8235 kvm_queue_interrupt(vcpu, pending_vec, false);
8236 pr_debug("Set back pending irq %d\n", pending_vec);
8237 }
8238
8239 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
8240 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
8241 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
8242 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
8243 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
8244 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
8245
8246 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
8247 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
8248
8249 update_cr8_intercept(vcpu);
8250
8251
8252 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
8253 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
8254 !is_protmode(vcpu))
8255 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8256
8257 kvm_make_request(KVM_REQ_EVENT, vcpu);
8258
8259 ret = 0;
8260out:
8261 return ret;
8262}
8263
8264int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
8265 struct kvm_sregs *sregs)
8266{
8267 int ret;
8268
8269 vcpu_load(vcpu);
8270 ret = __set_sregs(vcpu, sregs);
8271 vcpu_put(vcpu);
8272 return ret;
8273}
8274
8275int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
8276 struct kvm_guest_debug *dbg)
8277{
8278 unsigned long rflags;
8279 int i, r;
8280
8281 vcpu_load(vcpu);
8282
8283 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
8284 r = -EBUSY;
8285 if (vcpu->arch.exception.pending)
8286 goto out;
8287 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
8288 kvm_queue_exception(vcpu, DB_VECTOR);
8289 else
8290 kvm_queue_exception(vcpu, BP_VECTOR);
8291 }
8292
8293
8294
8295
8296
8297 rflags = kvm_get_rflags(vcpu);
8298
8299 vcpu->guest_debug = dbg->control;
8300 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
8301 vcpu->guest_debug = 0;
8302
8303 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
8304 for (i = 0; i < KVM_NR_DB_REGS; ++i)
8305 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
8306 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
8307 } else {
8308 for (i = 0; i < KVM_NR_DB_REGS; i++)
8309 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
8310 }
8311 kvm_update_dr7(vcpu);
8312
8313 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
8314 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
8315 get_segment_base(vcpu, VCPU_SREG_CS);
8316
8317
8318
8319
8320
8321 kvm_set_rflags(vcpu, rflags);
8322
8323 kvm_x86_ops->update_bp_intercept(vcpu);
8324
8325 r = 0;
8326
8327out:
8328 vcpu_put(vcpu);
8329 return r;
8330}
8331
8332
8333
8334
8335int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
8336 struct kvm_translation *tr)
8337{
8338 unsigned long vaddr = tr->linear_address;
8339 gpa_t gpa;
8340 int idx;
8341
8342 vcpu_load(vcpu);
8343
8344 idx = srcu_read_lock(&vcpu->kvm->srcu);
8345 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
8346 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8347 tr->physical_address = gpa;
8348 tr->valid = gpa != UNMAPPED_GVA;
8349 tr->writeable = 1;
8350 tr->usermode = 0;
8351
8352 vcpu_put(vcpu);
8353 return 0;
8354}
8355
8356int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
8357{
8358 struct fxregs_state *fxsave;
8359
8360 vcpu_load(vcpu);
8361
8362 fxsave = &vcpu->arch.guest_fpu.state.fxsave;
8363 memcpy(fpu->fpr, fxsave->st_space, 128);
8364 fpu->fcw = fxsave->cwd;
8365 fpu->fsw = fxsave->swd;
8366 fpu->ftwx = fxsave->twd;
8367 fpu->last_opcode = fxsave->fop;
8368 fpu->last_ip = fxsave->rip;
8369 fpu->last_dp = fxsave->rdp;
8370 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
8371
8372 vcpu_put(vcpu);
8373 return 0;
8374}
8375
8376int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
8377{
8378 struct fxregs_state *fxsave;
8379
8380 vcpu_load(vcpu);
8381
8382 fxsave = &vcpu->arch.guest_fpu.state.fxsave;
8383
8384 memcpy(fxsave->st_space, fpu->fpr, 128);
8385 fxsave->cwd = fpu->fcw;
8386 fxsave->swd = fpu->fsw;
8387 fxsave->twd = fpu->ftwx;
8388 fxsave->fop = fpu->last_opcode;
8389 fxsave->rip = fpu->last_ip;
8390 fxsave->rdp = fpu->last_dp;
8391 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
8392
8393 vcpu_put(vcpu);
8394 return 0;
8395}
8396
8397static void store_regs(struct kvm_vcpu *vcpu)
8398{
8399 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
8400
8401 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
8402 __get_regs(vcpu, &vcpu->run->s.regs.regs);
8403
8404 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
8405 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
8406
8407 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
8408 kvm_vcpu_ioctl_x86_get_vcpu_events(
8409 vcpu, &vcpu->run->s.regs.events);
8410}
8411
8412static int sync_regs(struct kvm_vcpu *vcpu)
8413{
8414 if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
8415 return -EINVAL;
8416
8417 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
8418 __set_regs(vcpu, &vcpu->run->s.regs.regs);
8419 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
8420 }
8421 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
8422 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
8423 return -EINVAL;
8424 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
8425 }
8426 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
8427 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
8428 vcpu, &vcpu->run->s.regs.events))
8429 return -EINVAL;
8430 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
8431 }
8432
8433 return 0;
8434}
8435
8436static void fx_init(struct kvm_vcpu *vcpu)
8437{
8438 fpstate_init(&vcpu->arch.guest_fpu.state);
8439 if (boot_cpu_has(X86_FEATURE_XSAVES))
8440 vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
8441 host_xcr0 | XSTATE_COMPACTION_ENABLED;
8442
8443
8444
8445
8446 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
8447
8448 vcpu->arch.cr0 |= X86_CR0_ET;
8449}
8450
8451void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
8452{
8453 void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
8454
8455 kvmclock_reset(vcpu);
8456
8457 kvm_x86_ops->vcpu_free(vcpu);
8458 free_cpumask_var(wbinvd_dirty_mask);
8459}
8460
8461struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
8462 unsigned int id)
8463{
8464 struct kvm_vcpu *vcpu;
8465
8466 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
8467 printk_once(KERN_WARNING
8468 "kvm: SMP vm created on host with unstable TSC; "
8469 "guest TSC will not be reliable\n");
8470
8471 vcpu = kvm_x86_ops->vcpu_create(kvm, id);
8472
8473 return vcpu;
8474}
8475
8476int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
8477{
8478 kvm_vcpu_mtrr_init(vcpu);
8479 vcpu_load(vcpu);
8480 kvm_vcpu_reset(vcpu, false);
8481 kvm_mmu_setup(vcpu);
8482 vcpu_put(vcpu);
8483 return 0;
8484}
8485
8486void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
8487{
8488 struct msr_data msr;
8489 struct kvm *kvm = vcpu->kvm;
8490
8491 kvm_hv_vcpu_postcreate(vcpu);
8492
8493 if (mutex_lock_killable(&vcpu->mutex))
8494 return;
8495 vcpu_load(vcpu);
8496 msr.data = 0x0;
8497 msr.index = MSR_IA32_TSC;
8498 msr.host_initiated = true;
8499 kvm_write_tsc(vcpu, &msr);
8500 vcpu_put(vcpu);
8501 mutex_unlock(&vcpu->mutex);
8502
8503 if (!kvmclock_periodic_sync)
8504 return;
8505
8506 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
8507 KVMCLOCK_SYNC_PERIOD);
8508}
8509
8510void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
8511{
8512 vcpu->arch.apf.msr_val = 0;
8513
8514 vcpu_load(vcpu);
8515 kvm_mmu_unload(vcpu);
8516 vcpu_put(vcpu);
8517
8518 kvm_x86_ops->vcpu_free(vcpu);
8519}
8520
8521void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
8522{
8523 kvm_lapic_reset(vcpu, init_event);
8524
8525 vcpu->arch.hflags = 0;
8526
8527 vcpu->arch.smi_pending = 0;
8528 vcpu->arch.smi_count = 0;
8529 atomic_set(&vcpu->arch.nmi_queued, 0);
8530 vcpu->arch.nmi_pending = 0;
8531 vcpu->arch.nmi_injected = false;
8532 kvm_clear_interrupt_queue(vcpu);
8533 kvm_clear_exception_queue(vcpu);
8534 vcpu->arch.exception.pending = false;
8535
8536 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
8537 kvm_update_dr0123(vcpu);
8538 vcpu->arch.dr6 = DR6_INIT;
8539 kvm_update_dr6(vcpu);
8540 vcpu->arch.dr7 = DR7_FIXED_1;
8541 kvm_update_dr7(vcpu);
8542
8543 vcpu->arch.cr2 = 0;
8544
8545 kvm_make_request(KVM_REQ_EVENT, vcpu);
8546 vcpu->arch.apf.msr_val = 0;
8547 vcpu->arch.st.msr_val = 0;
8548
8549 kvmclock_reset(vcpu);
8550
8551 kvm_clear_async_pf_completion_queue(vcpu);
8552 kvm_async_pf_hash_reset(vcpu);
8553 vcpu->arch.apf.halted = false;
8554
8555 if (kvm_mpx_supported()) {
8556 void *mpx_state_buffer;
8557
8558
8559
8560
8561
8562 if (init_event)
8563 kvm_put_guest_fpu(vcpu);
8564 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
8565 XFEATURE_MASK_BNDREGS);
8566 if (mpx_state_buffer)
8567 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
8568 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
8569 XFEATURE_MASK_BNDCSR);
8570 if (mpx_state_buffer)
8571 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
8572 if (init_event)
8573 kvm_load_guest_fpu(vcpu);
8574 }
8575
8576 if (!init_event) {
8577 kvm_pmu_reset(vcpu);
8578 vcpu->arch.smbase = 0x30000;
8579
8580 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
8581 vcpu->arch.msr_misc_features_enables = 0;
8582
8583 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
8584 }
8585
8586 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
8587 vcpu->arch.regs_avail = ~0;
8588 vcpu->arch.regs_dirty = ~0;
8589
8590 vcpu->arch.ia32_xss = 0;
8591
8592 kvm_x86_ops->vcpu_reset(vcpu, init_event);
8593}
8594
8595void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
8596{
8597 struct kvm_segment cs;
8598
8599 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
8600 cs.selector = vector << 8;
8601 cs.base = vector << 12;
8602 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
8603 kvm_rip_write(vcpu, 0);
8604}
8605
8606int kvm_arch_hardware_enable(void)
8607{
8608 struct kvm *kvm;
8609 struct kvm_vcpu *vcpu;
8610 int i;
8611 int ret;
8612 u64 local_tsc;
8613 u64 max_tsc = 0;
8614 bool stable, backwards_tsc = false;
8615
8616 kvm_shared_msr_cpu_online();
8617 ret = kvm_x86_ops->hardware_enable();
8618 if (ret != 0)
8619 return ret;
8620
8621 local_tsc = rdtsc();
8622 stable = !kvm_check_tsc_unstable();
8623 list_for_each_entry(kvm, &vm_list, vm_list) {
8624 kvm_for_each_vcpu(i, vcpu, kvm) {
8625 if (!stable && vcpu->cpu == smp_processor_id())
8626 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8627 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
8628 backwards_tsc = true;
8629 if (vcpu->arch.last_host_tsc > max_tsc)
8630 max_tsc = vcpu->arch.last_host_tsc;
8631 }
8632 }
8633 }
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673 if (backwards_tsc) {
8674 u64 delta_cyc = max_tsc - local_tsc;
8675 list_for_each_entry(kvm, &vm_list, vm_list) {
8676 kvm->arch.backwards_tsc_observed = true;
8677 kvm_for_each_vcpu(i, vcpu, kvm) {
8678 vcpu->arch.tsc_offset_adjustment += delta_cyc;
8679 vcpu->arch.last_host_tsc = local_tsc;
8680 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
8681 }
8682
8683
8684
8685
8686
8687
8688
8689 kvm->arch.last_tsc_nsec = 0;
8690 kvm->arch.last_tsc_write = 0;
8691 }
8692
8693 }
8694 return 0;
8695}
8696
8697void kvm_arch_hardware_disable(void)
8698{
8699 kvm_x86_ops->hardware_disable();
8700 drop_user_return_notifiers();
8701}
8702
8703int kvm_arch_hardware_setup(void)
8704{
8705 int r;
8706
8707 r = kvm_x86_ops->hardware_setup();
8708 if (r != 0)
8709 return r;
8710
8711 if (kvm_has_tsc_control) {
8712
8713
8714
8715
8716
8717
8718 u64 max = min(0x7fffffffULL,
8719 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
8720 kvm_max_guest_tsc_khz = max;
8721
8722 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
8723 }
8724
8725 kvm_init_msr_list();
8726 return 0;
8727}
8728
8729void kvm_arch_hardware_unsetup(void)
8730{
8731 kvm_x86_ops->hardware_unsetup();
8732}
8733
8734void kvm_arch_check_processor_compat(void *rtn)
8735{
8736 kvm_x86_ops->check_processor_compatibility(rtn);
8737}
8738
8739bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
8740{
8741 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
8742}
8743EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
8744
8745bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
8746{
8747 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
8748}
8749
8750struct static_key kvm_no_apic_vcpu __read_mostly;
8751EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
8752
8753int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
8754{
8755 struct page *page;
8756 int r;
8757
8758 vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
8759 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
8760 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
8761 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8762 else
8763 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
8764
8765 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
8766 if (!page) {
8767 r = -ENOMEM;
8768 goto fail;
8769 }
8770 vcpu->arch.pio_data = page_address(page);
8771
8772 kvm_set_tsc_khz(vcpu, max_tsc_khz);
8773
8774 r = kvm_mmu_create(vcpu);
8775 if (r < 0)
8776 goto fail_free_pio_data;
8777
8778 if (irqchip_in_kernel(vcpu->kvm)) {
8779 r = kvm_create_lapic(vcpu);
8780 if (r < 0)
8781 goto fail_mmu_destroy;
8782 } else
8783 static_key_slow_inc(&kvm_no_apic_vcpu);
8784
8785 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
8786 GFP_KERNEL);
8787 if (!vcpu->arch.mce_banks) {
8788 r = -ENOMEM;
8789 goto fail_free_lapic;
8790 }
8791 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
8792
8793 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
8794 r = -ENOMEM;
8795 goto fail_free_mce_banks;
8796 }
8797
8798 fx_init(vcpu);
8799
8800 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
8801
8802 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
8803
8804 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
8805
8806 kvm_async_pf_hash_reset(vcpu);
8807 kvm_pmu_init(vcpu);
8808
8809 vcpu->arch.pending_external_vector = -1;
8810 vcpu->arch.preempted_in_kernel = false;
8811
8812 kvm_hv_vcpu_init(vcpu);
8813
8814 return 0;
8815
8816fail_free_mce_banks:
8817 kfree(vcpu->arch.mce_banks);
8818fail_free_lapic:
8819 kvm_free_lapic(vcpu);
8820fail_mmu_destroy:
8821 kvm_mmu_destroy(vcpu);
8822fail_free_pio_data:
8823 free_page((unsigned long)vcpu->arch.pio_data);
8824fail:
8825 return r;
8826}
8827
8828void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
8829{
8830 int idx;
8831
8832 kvm_hv_vcpu_uninit(vcpu);
8833 kvm_pmu_destroy(vcpu);
8834 kfree(vcpu->arch.mce_banks);
8835 kvm_free_lapic(vcpu);
8836 idx = srcu_read_lock(&vcpu->kvm->srcu);
8837 kvm_mmu_destroy(vcpu);
8838 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8839 free_page((unsigned long)vcpu->arch.pio_data);
8840 if (!lapic_in_kernel(vcpu))
8841 static_key_slow_dec(&kvm_no_apic_vcpu);
8842}
8843
8844void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
8845{
8846 vcpu->arch.l1tf_flush_l1d = true;
8847 kvm_x86_ops->sched_in(vcpu, cpu);
8848}
8849
8850int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
8851{
8852 if (type)
8853 return -EINVAL;
8854
8855 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
8856 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
8857 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
8858 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
8859 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
8860
8861
8862 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
8863
8864 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
8865 &kvm->arch.irq_sources_bitmap);
8866
8867 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
8868 mutex_init(&kvm->arch.apic_map_lock);
8869 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
8870
8871 kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
8872 pvclock_update_vm_gtod_copy(kvm);
8873
8874 kvm->arch.guest_can_read_msr_platform_info = true;
8875
8876 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
8877 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
8878
8879 kvm_hv_init_vm(kvm);
8880 kvm_page_track_init(kvm);
8881 kvm_mmu_init_vm(kvm);
8882
8883 if (kvm_x86_ops->vm_init)
8884 return kvm_x86_ops->vm_init(kvm);
8885
8886 return 0;
8887}
8888
8889static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
8890{
8891 vcpu_load(vcpu);
8892 kvm_mmu_unload(vcpu);
8893 vcpu_put(vcpu);
8894}
8895
8896static void kvm_free_vcpus(struct kvm *kvm)
8897{
8898 unsigned int i;
8899 struct kvm_vcpu *vcpu;
8900
8901
8902
8903
8904 kvm_for_each_vcpu(i, vcpu, kvm) {
8905 kvm_clear_async_pf_completion_queue(vcpu);
8906 kvm_unload_vcpu_mmu(vcpu);
8907 }
8908 kvm_for_each_vcpu(i, vcpu, kvm)
8909 kvm_arch_vcpu_free(vcpu);
8910
8911 mutex_lock(&kvm->lock);
8912 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
8913 kvm->vcpus[i] = NULL;
8914
8915 atomic_set(&kvm->online_vcpus, 0);
8916 mutex_unlock(&kvm->lock);
8917}
8918
8919void kvm_arch_sync_events(struct kvm *kvm)
8920{
8921 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
8922 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
8923 kvm_free_pit(kvm);
8924}
8925
8926int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
8927{
8928 int i, r;
8929 unsigned long hva;
8930 struct kvm_memslots *slots = kvm_memslots(kvm);
8931 struct kvm_memory_slot *slot, old;
8932
8933
8934 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
8935 return -EINVAL;
8936
8937 slot = id_to_memslot(slots, id);
8938 if (size) {
8939 if (slot->npages)
8940 return -EEXIST;
8941
8942
8943
8944
8945
8946 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
8947 MAP_SHARED | MAP_ANONYMOUS, 0);
8948 if (IS_ERR((void *)hva))
8949 return PTR_ERR((void *)hva);
8950 } else {
8951 if (!slot->npages)
8952 return 0;
8953
8954 hva = 0;
8955 }
8956
8957 old = *slot;
8958 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
8959 struct kvm_userspace_memory_region m;
8960
8961 m.slot = id | (i << 16);
8962 m.flags = 0;
8963 m.guest_phys_addr = gpa;
8964 m.userspace_addr = hva;
8965 m.memory_size = size;
8966 r = __kvm_set_memory_region(kvm, &m);
8967 if (r < 0)
8968 return r;
8969 }
8970
8971 if (!size)
8972 vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
8973
8974 return 0;
8975}
8976EXPORT_SYMBOL_GPL(__x86_set_memory_region);
8977
8978int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
8979{
8980 int r;
8981
8982 mutex_lock(&kvm->slots_lock);
8983 r = __x86_set_memory_region(kvm, id, gpa, size);
8984 mutex_unlock(&kvm->slots_lock);
8985
8986 return r;
8987}
8988EXPORT_SYMBOL_GPL(x86_set_memory_region);
8989
8990void kvm_arch_destroy_vm(struct kvm *kvm)
8991{
8992 if (current->mm == kvm->mm) {
8993
8994
8995
8996
8997
8998 x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
8999 x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
9000 x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
9001 }
9002 if (kvm_x86_ops->vm_destroy)
9003 kvm_x86_ops->vm_destroy(kvm);
9004 kvm_pic_destroy(kvm);
9005 kvm_ioapic_destroy(kvm);
9006 kvm_free_vcpus(kvm);
9007 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
9008 kvm_mmu_uninit_vm(kvm);
9009 kvm_page_track_cleanup(kvm);
9010 kvm_hv_destroy_vm(kvm);
9011}
9012
9013void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
9014 struct kvm_memory_slot *dont)
9015{
9016 int i;
9017
9018 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9019 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
9020 kvfree(free->arch.rmap[i]);
9021 free->arch.rmap[i] = NULL;
9022 }
9023 if (i == 0)
9024 continue;
9025
9026 if (!dont || free->arch.lpage_info[i - 1] !=
9027 dont->arch.lpage_info[i - 1]) {
9028 kvfree(free->arch.lpage_info[i - 1]);
9029 free->arch.lpage_info[i - 1] = NULL;
9030 }
9031 }
9032
9033 kvm_page_track_free_memslot(free, dont);
9034}
9035
9036int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
9037 unsigned long npages)
9038{
9039 int i;
9040
9041 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9042 struct kvm_lpage_info *linfo;
9043 unsigned long ugfn;
9044 int lpages;
9045 int level = i + 1;
9046
9047 lpages = gfn_to_index(slot->base_gfn + npages - 1,
9048 slot->base_gfn, level) + 1;
9049
9050 slot->arch.rmap[i] =
9051 kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
9052 GFP_KERNEL);
9053 if (!slot->arch.rmap[i])
9054 goto out_free;
9055 if (i == 0)
9056 continue;
9057
9058 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL);
9059 if (!linfo)
9060 goto out_free;
9061
9062 slot->arch.lpage_info[i - 1] = linfo;
9063
9064 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
9065 linfo[0].disallow_lpage = 1;
9066 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
9067 linfo[lpages - 1].disallow_lpage = 1;
9068 ugfn = slot->userspace_addr >> PAGE_SHIFT;
9069
9070
9071
9072
9073
9074 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
9075 !kvm_largepages_enabled()) {
9076 unsigned long j;
9077
9078 for (j = 0; j < lpages; ++j)
9079 linfo[j].disallow_lpage = 1;
9080 }
9081 }
9082
9083 if (kvm_page_track_create_memslot(slot, npages))
9084 goto out_free;
9085
9086 return 0;
9087
9088out_free:
9089 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9090 kvfree(slot->arch.rmap[i]);
9091 slot->arch.rmap[i] = NULL;
9092 if (i == 0)
9093 continue;
9094
9095 kvfree(slot->arch.lpage_info[i - 1]);
9096 slot->arch.lpage_info[i - 1] = NULL;
9097 }
9098 return -ENOMEM;
9099}
9100
9101void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
9102{
9103
9104
9105
9106
9107 kvm_mmu_invalidate_mmio_sptes(kvm, slots);
9108}
9109
9110int kvm_arch_prepare_memory_region(struct kvm *kvm,
9111 struct kvm_memory_slot *memslot,
9112 const struct kvm_userspace_memory_region *mem,
9113 enum kvm_mr_change change)
9114{
9115 return 0;
9116}
9117
9118static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
9119 struct kvm_memory_slot *new)
9120{
9121
9122 if (new->flags & KVM_MEM_READONLY) {
9123 kvm_mmu_slot_remove_write_access(kvm, new);
9124 return;
9125 }
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
9158 if (kvm_x86_ops->slot_enable_log_dirty)
9159 kvm_x86_ops->slot_enable_log_dirty(kvm, new);
9160 else
9161 kvm_mmu_slot_remove_write_access(kvm, new);
9162 } else {
9163 if (kvm_x86_ops->slot_disable_log_dirty)
9164 kvm_x86_ops->slot_disable_log_dirty(kvm, new);
9165 }
9166}
9167
9168void kvm_arch_commit_memory_region(struct kvm *kvm,
9169 const struct kvm_userspace_memory_region *mem,
9170 const struct kvm_memory_slot *old,
9171 const struct kvm_memory_slot *new,
9172 enum kvm_mr_change change)
9173{
9174 int nr_mmu_pages = 0;
9175
9176 if (!kvm->arch.n_requested_mmu_pages)
9177 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
9178
9179 if (nr_mmu_pages)
9180 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194 if ((change != KVM_MR_DELETE) &&
9195 (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
9196 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
9197 kvm_mmu_zap_collapsible_sptes(kvm, new);
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
9209 if (change != KVM_MR_DELETE)
9210 kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
9211}
9212
9213void kvm_arch_flush_shadow_all(struct kvm *kvm)
9214{
9215 kvm_mmu_invalidate_zap_all_pages(kvm);
9216}
9217
9218void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
9219 struct kvm_memory_slot *slot)
9220{
9221 kvm_page_track_flush_slot(kvm, slot);
9222}
9223
9224static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
9225{
9226 return (is_guest_mode(vcpu) &&
9227 kvm_x86_ops->guest_apic_has_interrupt &&
9228 kvm_x86_ops->guest_apic_has_interrupt(vcpu));
9229}
9230
9231static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
9232{
9233 if (!list_empty_careful(&vcpu->async_pf.done))
9234 return true;
9235
9236 if (kvm_apic_has_events(vcpu))
9237 return true;
9238
9239 if (vcpu->arch.pv.pv_unhalted)
9240 return true;
9241
9242 if (vcpu->arch.exception.pending)
9243 return true;
9244
9245 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
9246 (vcpu->arch.nmi_pending &&
9247 kvm_x86_ops->nmi_allowed(vcpu)))
9248 return true;
9249
9250 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
9251 (vcpu->arch.smi_pending && !is_smm(vcpu)))
9252 return true;
9253
9254 if (kvm_arch_interrupt_allowed(vcpu) &&
9255 (kvm_cpu_has_interrupt(vcpu) ||
9256 kvm_guest_apic_has_interrupt(vcpu)))
9257 return true;
9258
9259 if (kvm_hv_has_stimer_pending(vcpu))
9260 return true;
9261
9262 return false;
9263}
9264
9265int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
9266{
9267 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
9268}
9269
9270bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
9271{
9272 return vcpu->arch.preempted_in_kernel;
9273}
9274
9275int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
9276{
9277 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
9278}
9279
9280int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
9281{
9282 return kvm_x86_ops->interrupt_allowed(vcpu);
9283}
9284
9285unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
9286{
9287 if (is_64_bit_mode(vcpu))
9288 return kvm_rip_read(vcpu);
9289 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
9290 kvm_rip_read(vcpu));
9291}
9292EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
9293
9294bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
9295{
9296 return kvm_get_linear_rip(vcpu) == linear_rip;
9297}
9298EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
9299
9300unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
9301{
9302 unsigned long rflags;
9303
9304 rflags = kvm_x86_ops->get_rflags(vcpu);
9305 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
9306 rflags &= ~X86_EFLAGS_TF;
9307 return rflags;
9308}
9309EXPORT_SYMBOL_GPL(kvm_get_rflags);
9310
9311static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
9312{
9313 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
9314 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
9315 rflags |= X86_EFLAGS_TF;
9316 kvm_x86_ops->set_rflags(vcpu, rflags);
9317}
9318
9319void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
9320{
9321 __kvm_set_rflags(vcpu, rflags);
9322 kvm_make_request(KVM_REQ_EVENT, vcpu);
9323}
9324EXPORT_SYMBOL_GPL(kvm_set_rflags);
9325
9326void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
9327{
9328 int r;
9329
9330 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
9331 work->wakeup_all)
9332 return;
9333
9334 r = kvm_mmu_reload(vcpu);
9335 if (unlikely(r))
9336 return;
9337
9338 if (!vcpu->arch.mmu.direct_map &&
9339 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
9340 return;
9341
9342 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
9343}
9344
9345static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
9346{
9347 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
9348}
9349
9350static inline u32 kvm_async_pf_next_probe(u32 key)
9351{
9352 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
9353}
9354
9355static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
9356{
9357 u32 key = kvm_async_pf_hash_fn(gfn);
9358
9359 while (vcpu->arch.apf.gfns[key] != ~0)
9360 key = kvm_async_pf_next_probe(key);
9361
9362 vcpu->arch.apf.gfns[key] = gfn;
9363}
9364
9365static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
9366{
9367 int i;
9368 u32 key = kvm_async_pf_hash_fn(gfn);
9369
9370 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
9371 (vcpu->arch.apf.gfns[key] != gfn &&
9372 vcpu->arch.apf.gfns[key] != ~0); i++)
9373 key = kvm_async_pf_next_probe(key);
9374
9375 return key;
9376}
9377
9378bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
9379{
9380 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
9381}
9382
9383static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
9384{
9385 u32 i, j, k;
9386
9387 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
9388 while (true) {
9389 vcpu->arch.apf.gfns[i] = ~0;
9390 do {
9391 j = kvm_async_pf_next_probe(j);
9392 if (vcpu->arch.apf.gfns[j] == ~0)
9393 return;
9394 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
9395
9396
9397
9398
9399
9400 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
9401 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
9402 i = j;
9403 }
9404}
9405
9406static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
9407{
9408
9409 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
9410 sizeof(val));
9411}
9412
9413static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
9414{
9415
9416 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
9417 sizeof(u32));
9418}
9419
9420void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
9421 struct kvm_async_pf *work)
9422{
9423 struct x86_exception fault;
9424
9425 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
9426 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
9427
9428 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
9429 (vcpu->arch.apf.send_user_only &&
9430 kvm_x86_ops->get_cpl(vcpu) == 0))
9431 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
9432 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
9433 fault.vector = PF_VECTOR;
9434 fault.error_code_valid = true;
9435 fault.error_code = 0;
9436 fault.nested_page_fault = false;
9437 fault.address = work->arch.token;
9438 fault.async_page_fault = true;
9439 kvm_inject_page_fault(vcpu, &fault);
9440 }
9441}
9442
9443void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
9444 struct kvm_async_pf *work)
9445{
9446 struct x86_exception fault;
9447 u32 val;
9448
9449 if (work->wakeup_all)
9450 work->arch.token = ~0;
9451 else
9452 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
9453 trace_kvm_async_pf_ready(work->arch.token, work->gva);
9454
9455 if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
9456 !apf_get_user(vcpu, &val)) {
9457 if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
9458 vcpu->arch.exception.pending &&
9459 vcpu->arch.exception.nr == PF_VECTOR &&
9460 !apf_put_user(vcpu, 0)) {
9461 vcpu->arch.exception.injected = false;
9462 vcpu->arch.exception.pending = false;
9463 vcpu->arch.exception.nr = 0;
9464 vcpu->arch.exception.has_error_code = false;
9465 vcpu->arch.exception.error_code = 0;
9466 } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
9467 fault.vector = PF_VECTOR;
9468 fault.error_code_valid = true;
9469 fault.error_code = 0;
9470 fault.nested_page_fault = false;
9471 fault.address = work->arch.token;
9472 fault.async_page_fault = true;
9473 kvm_inject_page_fault(vcpu, &fault);
9474 }
9475 }
9476 vcpu->arch.apf.halted = false;
9477 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9478}
9479
9480bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
9481{
9482 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
9483 return true;
9484 else
9485 return kvm_can_do_async_pf(vcpu);
9486}
9487
9488void kvm_arch_start_assignment(struct kvm *kvm)
9489{
9490 atomic_inc(&kvm->arch.assigned_device_count);
9491}
9492EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
9493
9494void kvm_arch_end_assignment(struct kvm *kvm)
9495{
9496 atomic_dec(&kvm->arch.assigned_device_count);
9497}
9498EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
9499
9500bool kvm_arch_has_assigned_device(struct kvm *kvm)
9501{
9502 return atomic_read(&kvm->arch.assigned_device_count);
9503}
9504EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
9505
9506void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
9507{
9508 atomic_inc(&kvm->arch.noncoherent_dma_count);
9509}
9510EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
9511
9512void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
9513{
9514 atomic_dec(&kvm->arch.noncoherent_dma_count);
9515}
9516EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
9517
9518bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
9519{
9520 return atomic_read(&kvm->arch.noncoherent_dma_count);
9521}
9522EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
9523
9524bool kvm_arch_has_irq_bypass(void)
9525{
9526 return kvm_x86_ops->update_pi_irte != NULL;
9527}
9528
9529int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
9530 struct irq_bypass_producer *prod)
9531{
9532 struct kvm_kernel_irqfd *irqfd =
9533 container_of(cons, struct kvm_kernel_irqfd, consumer);
9534
9535 irqfd->producer = prod;
9536
9537 return kvm_x86_ops->update_pi_irte(irqfd->kvm,
9538 prod->irq, irqfd->gsi, 1);
9539}
9540
9541void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
9542 struct irq_bypass_producer *prod)
9543{
9544 int ret;
9545 struct kvm_kernel_irqfd *irqfd =
9546 container_of(cons, struct kvm_kernel_irqfd, consumer);
9547
9548 WARN_ON(irqfd->producer != prod);
9549 irqfd->producer = NULL;
9550
9551
9552
9553
9554
9555
9556
9557 ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
9558 if (ret)
9559 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
9560 " fails: %d\n", irqfd->consumer.token, ret);
9561}
9562
9563int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
9564 uint32_t guest_irq, bool set)
9565{
9566 if (!kvm_x86_ops->update_pi_irte)
9567 return -EINVAL;
9568
9569 return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
9570}
9571
9572bool kvm_vector_hashing_enabled(void)
9573{
9574 return vector_hashing;
9575}
9576EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
9577
9578EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
9579EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
9580EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
9581EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
9582EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
9583EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
9584EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
9585EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
9586EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
9587EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
9588EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
9589EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
9590EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
9591EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
9592EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
9593EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
9594EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
9595EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
9596EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
9597