1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30
31#include <linux/clocksource.h>
32#include <linux/interrupt.h>
33#include <linux/kvm.h>
34#include <linux/fs.h>
35#include <linux/vmalloc.h>
36#include <linux/module.h>
37#include <linux/mman.h>
38#include <linux/highmem.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/cpufreq.h>
42#include <linux/user-return-notifier.h>
43#include <linux/srcu.h>
44#include <linux/slab.h>
45#include <linux/perf_event.h>
46#include <linux/uaccess.h>
47#include <linux/hash.h>
48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
51#include <trace/events/kvm.h>
52
53#define CREATE_TRACE_POINTS
54#include "trace.h"
55
56#include <asm/debugreg.h>
57#include <asm/msr.h>
58#include <asm/desc.h>
59#include <asm/mtrr.h>
60#include <asm/mce.h>
61#include <asm/i387.h>
62#include <asm/fpu-internal.h>
63#include <asm/xcr.h>
64#include <asm/pvclock.h>
65#include <asm/div64.h>
66
67#define MAX_IO_MSRS 256
68#define KVM_MAX_MCE_BANKS 32
69#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
70
71#define emul_to_vcpu(ctxt) \
72 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
73
74
75
76
77
78#ifdef CONFIG_X86_64
79static
80u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
81#else
82static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
83#endif
84
85#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
86#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
87
88static void update_cr8_intercept(struct kvm_vcpu *vcpu);
89static void process_nmi(struct kvm_vcpu *vcpu);
90
91struct kvm_x86_ops *kvm_x86_ops;
92EXPORT_SYMBOL_GPL(kvm_x86_ops);
93
94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96
97bool kvm_has_tsc_control;
98EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
99u32 kvm_max_guest_tsc_khz;
100EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
101
102
103static u32 tsc_tolerance_ppm = 250;
104module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
105
106#define KVM_NR_SHARED_MSRS 16
107
108struct kvm_shared_msrs_global {
109 int nr;
110 u32 msrs[KVM_NR_SHARED_MSRS];
111};
112
113struct kvm_shared_msrs {
114 struct user_return_notifier urn;
115 bool registered;
116 struct kvm_shared_msr_values {
117 u64 host;
118 u64 curr;
119 } values[KVM_NR_SHARED_MSRS];
120};
121
122static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
123static struct kvm_shared_msrs __percpu *shared_msrs;
124
125struct kvm_stats_debugfs_item debugfs_entries[] = {
126 { "pf_fixed", VCPU_STAT(pf_fixed) },
127 { "pf_guest", VCPU_STAT(pf_guest) },
128 { "tlb_flush", VCPU_STAT(tlb_flush) },
129 { "invlpg", VCPU_STAT(invlpg) },
130 { "exits", VCPU_STAT(exits) },
131 { "io_exits", VCPU_STAT(io_exits) },
132 { "mmio_exits", VCPU_STAT(mmio_exits) },
133 { "signal_exits", VCPU_STAT(signal_exits) },
134 { "irq_window", VCPU_STAT(irq_window_exits) },
135 { "nmi_window", VCPU_STAT(nmi_window_exits) },
136 { "halt_exits", VCPU_STAT(halt_exits) },
137 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
138 { "hypercalls", VCPU_STAT(hypercalls) },
139 { "request_irq", VCPU_STAT(request_irq_exits) },
140 { "irq_exits", VCPU_STAT(irq_exits) },
141 { "host_state_reload", VCPU_STAT(host_state_reload) },
142 { "efer_reload", VCPU_STAT(efer_reload) },
143 { "fpu_reload", VCPU_STAT(fpu_reload) },
144 { "insn_emulation", VCPU_STAT(insn_emulation) },
145 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
146 { "irq_injections", VCPU_STAT(irq_injections) },
147 { "nmi_injections", VCPU_STAT(nmi_injections) },
148 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
149 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
150 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
151 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
152 { "mmu_flooded", VM_STAT(mmu_flooded) },
153 { "mmu_recycled", VM_STAT(mmu_recycled) },
154 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
155 { "mmu_unsync", VM_STAT(mmu_unsync) },
156 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
157 { "largepages", VM_STAT(lpages) },
158 { NULL }
159};
160
161u64 __read_mostly host_xcr0;
162
163static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
164
165static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
166{
167 int i;
168 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
169 vcpu->arch.apf.gfns[i] = ~0;
170}
171
172static void kvm_on_user_return(struct user_return_notifier *urn)
173{
174 unsigned slot;
175 struct kvm_shared_msrs *locals
176 = container_of(urn, struct kvm_shared_msrs, urn);
177 struct kvm_shared_msr_values *values;
178
179 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
180 values = &locals->values[slot];
181 if (values->host != values->curr) {
182 wrmsrl(shared_msrs_global.msrs[slot], values->host);
183 values->curr = values->host;
184 }
185 }
186 locals->registered = false;
187 user_return_notifier_unregister(urn);
188}
189
190static void shared_msr_update(unsigned slot, u32 msr)
191{
192 u64 value;
193 unsigned int cpu = smp_processor_id();
194 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
195
196
197
198 if (slot >= shared_msrs_global.nr) {
199 printk(KERN_ERR "kvm: invalid MSR slot!");
200 return;
201 }
202 rdmsrl_safe(msr, &value);
203 smsr->values[slot].host = value;
204 smsr->values[slot].curr = value;
205}
206
207void kvm_define_shared_msr(unsigned slot, u32 msr)
208{
209 if (slot >= shared_msrs_global.nr)
210 shared_msrs_global.nr = slot + 1;
211 shared_msrs_global.msrs[slot] = msr;
212
213 smp_wmb();
214}
215EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
216
217static void kvm_shared_msr_cpu_online(void)
218{
219 unsigned i;
220
221 for (i = 0; i < shared_msrs_global.nr; ++i)
222 shared_msr_update(i, shared_msrs_global.msrs[i]);
223}
224
225void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
226{
227 unsigned int cpu = smp_processor_id();
228 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
229
230 if (((value ^ smsr->values[slot].curr) & mask) == 0)
231 return;
232 smsr->values[slot].curr = value;
233 wrmsrl(shared_msrs_global.msrs[slot], value);
234 if (!smsr->registered) {
235 smsr->urn.on_user_return = kvm_on_user_return;
236 user_return_notifier_register(&smsr->urn);
237 smsr->registered = true;
238 }
239}
240EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
241
242static void drop_user_return_notifiers(void *ignore)
243{
244 unsigned int cpu = smp_processor_id();
245 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
246
247 if (smsr->registered)
248 kvm_on_user_return(&smsr->urn);
249}
250
251u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
252{
253 return vcpu->arch.apic_base;
254}
255EXPORT_SYMBOL_GPL(kvm_get_apic_base);
256
257void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
258{
259
260 kvm_lapic_set_base(vcpu, data);
261}
262EXPORT_SYMBOL_GPL(kvm_set_apic_base);
263
264asmlinkage void kvm_spurious_fault(void)
265{
266
267 BUG();
268}
269EXPORT_SYMBOL_GPL(kvm_spurious_fault);
270
271#define EXCPT_BENIGN 0
272#define EXCPT_CONTRIBUTORY 1
273#define EXCPT_PF 2
274
275static int exception_class(int vector)
276{
277 switch (vector) {
278 case PF_VECTOR:
279 return EXCPT_PF;
280 case DE_VECTOR:
281 case TS_VECTOR:
282 case NP_VECTOR:
283 case SS_VECTOR:
284 case GP_VECTOR:
285 return EXCPT_CONTRIBUTORY;
286 default:
287 break;
288 }
289 return EXCPT_BENIGN;
290}
291
292static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
293 unsigned nr, bool has_error, u32 error_code,
294 bool reinject)
295{
296 u32 prev_nr;
297 int class1, class2;
298
299 kvm_make_request(KVM_REQ_EVENT, vcpu);
300
301 if (!vcpu->arch.exception.pending) {
302 queue:
303 vcpu->arch.exception.pending = true;
304 vcpu->arch.exception.has_error_code = has_error;
305 vcpu->arch.exception.nr = nr;
306 vcpu->arch.exception.error_code = error_code;
307 vcpu->arch.exception.reinject = reinject;
308 return;
309 }
310
311
312 prev_nr = vcpu->arch.exception.nr;
313 if (prev_nr == DF_VECTOR) {
314
315 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
316 return;
317 }
318 class1 = exception_class(prev_nr);
319 class2 = exception_class(nr);
320 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
321 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
322
323 vcpu->arch.exception.pending = true;
324 vcpu->arch.exception.has_error_code = true;
325 vcpu->arch.exception.nr = DF_VECTOR;
326 vcpu->arch.exception.error_code = 0;
327 } else
328
329
330
331 goto queue;
332}
333
334void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
335{
336 kvm_multiple_exception(vcpu, nr, false, 0, false);
337}
338EXPORT_SYMBOL_GPL(kvm_queue_exception);
339
340void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
341{
342 kvm_multiple_exception(vcpu, nr, false, 0, true);
343}
344EXPORT_SYMBOL_GPL(kvm_requeue_exception);
345
346void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
347{
348 if (err)
349 kvm_inject_gp(vcpu, 0);
350 else
351 kvm_x86_ops->skip_emulated_instruction(vcpu);
352}
353EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
354
355void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
356{
357 ++vcpu->stat.pf_guest;
358 vcpu->arch.cr2 = fault->address;
359 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
360}
361EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
362
363void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
364{
365 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
366 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
367 else
368 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
369}
370
371void kvm_inject_nmi(struct kvm_vcpu *vcpu)
372{
373 atomic_inc(&vcpu->arch.nmi_queued);
374 kvm_make_request(KVM_REQ_NMI, vcpu);
375}
376EXPORT_SYMBOL_GPL(kvm_inject_nmi);
377
378void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
379{
380 kvm_multiple_exception(vcpu, nr, true, error_code, false);
381}
382EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
383
384void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
385{
386 kvm_multiple_exception(vcpu, nr, true, error_code, true);
387}
388EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
389
390
391
392
393
394bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
395{
396 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
397 return true;
398 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
399 return false;
400}
401EXPORT_SYMBOL_GPL(kvm_require_cpl);
402
403
404
405
406
407
408int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
409 gfn_t ngfn, void *data, int offset, int len,
410 u32 access)
411{
412 gfn_t real_gfn;
413 gpa_t ngpa;
414
415 ngpa = gfn_to_gpa(ngfn);
416 real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
417 if (real_gfn == UNMAPPED_GVA)
418 return -EFAULT;
419
420 real_gfn = gpa_to_gfn(real_gfn);
421
422 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
423}
424EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
425
426int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
427 void *data, int offset, int len, u32 access)
428{
429 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
430 data, offset, len, access);
431}
432
433
434
435
436int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
437{
438 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
439 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
440 int i;
441 int ret;
442 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
443
444 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
445 offset * sizeof(u64), sizeof(pdpte),
446 PFERR_USER_MASK|PFERR_WRITE_MASK);
447 if (ret < 0) {
448 ret = 0;
449 goto out;
450 }
451 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
452 if (is_present_gpte(pdpte[i]) &&
453 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
454 ret = 0;
455 goto out;
456 }
457 }
458 ret = 1;
459
460 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
461 __set_bit(VCPU_EXREG_PDPTR,
462 (unsigned long *)&vcpu->arch.regs_avail);
463 __set_bit(VCPU_EXREG_PDPTR,
464 (unsigned long *)&vcpu->arch.regs_dirty);
465out:
466
467 return ret;
468}
469EXPORT_SYMBOL_GPL(load_pdptrs);
470
471static bool pdptrs_changed(struct kvm_vcpu *vcpu)
472{
473 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
474 bool changed = true;
475 int offset;
476 gfn_t gfn;
477 int r;
478
479 if (is_long_mode(vcpu) || !is_pae(vcpu))
480 return false;
481
482 if (!test_bit(VCPU_EXREG_PDPTR,
483 (unsigned long *)&vcpu->arch.regs_avail))
484 return true;
485
486 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
487 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
488 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
489 PFERR_USER_MASK | PFERR_WRITE_MASK);
490 if (r < 0)
491 goto out;
492 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
493out:
494
495 return changed;
496}
497
498int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
499{
500 unsigned long old_cr0 = kvm_read_cr0(vcpu);
501 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
502 X86_CR0_CD | X86_CR0_NW;
503
504 cr0 |= X86_CR0_ET;
505
506#ifdef CONFIG_X86_64
507 if (cr0 & 0xffffffff00000000UL)
508 return 1;
509#endif
510
511 cr0 &= ~CR0_RESERVED_BITS;
512
513 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
514 return 1;
515
516 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
517 return 1;
518
519 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
520#ifdef CONFIG_X86_64
521 if ((vcpu->arch.efer & EFER_LME)) {
522 int cs_db, cs_l;
523
524 if (!is_pae(vcpu))
525 return 1;
526 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
527 if (cs_l)
528 return 1;
529 } else
530#endif
531 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
532 kvm_read_cr3(vcpu)))
533 return 1;
534 }
535
536 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
537 return 1;
538
539 kvm_x86_ops->set_cr0(vcpu, cr0);
540
541 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
542 kvm_clear_async_pf_completion_queue(vcpu);
543 kvm_async_pf_hash_reset(vcpu);
544 }
545
546 if ((cr0 ^ old_cr0) & update_bits)
547 kvm_mmu_reset_context(vcpu);
548 return 0;
549}
550EXPORT_SYMBOL_GPL(kvm_set_cr0);
551
552void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
553{
554 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
555}
556EXPORT_SYMBOL_GPL(kvm_lmsw);
557
558static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
559{
560 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
561 !vcpu->guest_xcr0_loaded) {
562
563 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
564 vcpu->guest_xcr0_loaded = 1;
565 }
566}
567
568static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
569{
570 if (vcpu->guest_xcr0_loaded) {
571 if (vcpu->arch.xcr0 != host_xcr0)
572 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
573 vcpu->guest_xcr0_loaded = 0;
574 }
575}
576
577int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
578{
579 u64 xcr0;
580
581
582 if (index != XCR_XFEATURE_ENABLED_MASK)
583 return 1;
584 xcr0 = xcr;
585 if (!(xcr0 & XSTATE_FP))
586 return 1;
587 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
588 return 1;
589 if (xcr0 & ~host_xcr0)
590 return 1;
591 kvm_put_guest_xcr0(vcpu);
592 vcpu->arch.xcr0 = xcr0;
593 return 0;
594}
595
596int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
597{
598 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
599 __kvm_set_xcr(vcpu, index, xcr)) {
600 kvm_inject_gp(vcpu, 0);
601 return 1;
602 }
603 return 0;
604}
605EXPORT_SYMBOL_GPL(kvm_set_xcr);
606
607int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
608{
609 unsigned long old_cr4 = kvm_read_cr4(vcpu);
610 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
611 X86_CR4_PAE | X86_CR4_SMEP;
612 if (cr4 & CR4_RESERVED_BITS)
613 return 1;
614
615 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
616 return 1;
617
618 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
619 return 1;
620
621 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
622 return 1;
623
624 if (is_long_mode(vcpu)) {
625 if (!(cr4 & X86_CR4_PAE))
626 return 1;
627 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
628 && ((cr4 ^ old_cr4) & pdptr_bits)
629 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
630 kvm_read_cr3(vcpu)))
631 return 1;
632
633 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
634 if (!guest_cpuid_has_pcid(vcpu))
635 return 1;
636
637
638 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
639 return 1;
640 }
641
642 if (kvm_x86_ops->set_cr4(vcpu, cr4))
643 return 1;
644
645 if (((cr4 ^ old_cr4) & pdptr_bits) ||
646 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
647 kvm_mmu_reset_context(vcpu);
648
649 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
650 kvm_update_cpuid(vcpu);
651
652 return 0;
653}
654EXPORT_SYMBOL_GPL(kvm_set_cr4);
655
656int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
657{
658 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
659 kvm_mmu_sync_roots(vcpu);
660 kvm_mmu_flush_tlb(vcpu);
661 return 0;
662 }
663
664 if (is_long_mode(vcpu)) {
665 if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
666 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
667 return 1;
668 } else
669 if (cr3 & CR3_L_MODE_RESERVED_BITS)
670 return 1;
671 } else {
672 if (is_pae(vcpu)) {
673 if (cr3 & CR3_PAE_RESERVED_BITS)
674 return 1;
675 if (is_paging(vcpu) &&
676 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
677 return 1;
678 }
679
680
681
682
683 }
684
685
686
687
688
689
690
691
692
693
694 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
695 return 1;
696 vcpu->arch.cr3 = cr3;
697 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
698 vcpu->arch.mmu.new_cr3(vcpu);
699 return 0;
700}
701EXPORT_SYMBOL_GPL(kvm_set_cr3);
702
703int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
704{
705 if (cr8 & CR8_RESERVED_BITS)
706 return 1;
707 if (irqchip_in_kernel(vcpu->kvm))
708 kvm_lapic_set_tpr(vcpu, cr8);
709 else
710 vcpu->arch.cr8 = cr8;
711 return 0;
712}
713EXPORT_SYMBOL_GPL(kvm_set_cr8);
714
715unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
716{
717 if (irqchip_in_kernel(vcpu->kvm))
718 return kvm_lapic_get_cr8(vcpu);
719 else
720 return vcpu->arch.cr8;
721}
722EXPORT_SYMBOL_GPL(kvm_get_cr8);
723
724static void kvm_update_dr7(struct kvm_vcpu *vcpu)
725{
726 unsigned long dr7;
727
728 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
729 dr7 = vcpu->arch.guest_debug_dr7;
730 else
731 dr7 = vcpu->arch.dr7;
732 kvm_x86_ops->set_dr7(vcpu, dr7);
733 vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
734}
735
736static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
737{
738 switch (dr) {
739 case 0 ... 3:
740 vcpu->arch.db[dr] = val;
741 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
742 vcpu->arch.eff_db[dr] = val;
743 break;
744 case 4:
745 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
746 return 1;
747
748 case 6:
749 if (val & 0xffffffff00000000ULL)
750 return -1;
751 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
752 break;
753 case 5:
754 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
755 return 1;
756
757 default:
758 if (val & 0xffffffff00000000ULL)
759 return -1;
760 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
761 kvm_update_dr7(vcpu);
762 break;
763 }
764
765 return 0;
766}
767
768int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
769{
770 int res;
771
772 res = __kvm_set_dr(vcpu, dr, val);
773 if (res > 0)
774 kvm_queue_exception(vcpu, UD_VECTOR);
775 else if (res < 0)
776 kvm_inject_gp(vcpu, 0);
777
778 return res;
779}
780EXPORT_SYMBOL_GPL(kvm_set_dr);
781
782static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
783{
784 switch (dr) {
785 case 0 ... 3:
786 *val = vcpu->arch.db[dr];
787 break;
788 case 4:
789 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
790 return 1;
791
792 case 6:
793 *val = vcpu->arch.dr6;
794 break;
795 case 5:
796 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
797 return 1;
798
799 default:
800 *val = vcpu->arch.dr7;
801 break;
802 }
803
804 return 0;
805}
806
807int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
808{
809 if (_kvm_get_dr(vcpu, dr, val)) {
810 kvm_queue_exception(vcpu, UD_VECTOR);
811 return 1;
812 }
813 return 0;
814}
815EXPORT_SYMBOL_GPL(kvm_get_dr);
816
817bool kvm_rdpmc(struct kvm_vcpu *vcpu)
818{
819 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
820 u64 data;
821 int err;
822
823 err = kvm_pmu_read_pmc(vcpu, ecx, &data);
824 if (err)
825 return err;
826 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
827 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
828 return err;
829}
830EXPORT_SYMBOL_GPL(kvm_rdpmc);
831
832
833
834
835
836
837
838
839
840
841#define KVM_SAVE_MSRS_BEGIN 10
842static u32 msrs_to_save[] = {
843 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
844 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
845 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
846 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
847 MSR_KVM_PV_EOI_EN,
848 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
849 MSR_STAR,
850#ifdef CONFIG_X86_64
851 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
852#endif
853 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
854};
855
856static unsigned num_msrs_to_save;
857
858static const u32 emulated_msrs[] = {
859 MSR_IA32_TSC_ADJUST,
860 MSR_IA32_TSCDEADLINE,
861 MSR_IA32_MISC_ENABLE,
862 MSR_IA32_MCG_STATUS,
863 MSR_IA32_MCG_CTL,
864};
865
866bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
867{
868 if (efer & efer_reserved_bits)
869 return false;
870
871 if (efer & EFER_FFXSR) {
872 struct kvm_cpuid_entry2 *feat;
873
874 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
875 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
876 return false;
877 }
878
879 if (efer & EFER_SVME) {
880 struct kvm_cpuid_entry2 *feat;
881
882 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
883 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
884 return false;
885 }
886
887 return true;
888}
889EXPORT_SYMBOL_GPL(kvm_valid_efer);
890
891static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
892{
893 u64 old_efer = vcpu->arch.efer;
894
895 if (!kvm_valid_efer(vcpu, efer))
896 return 1;
897
898 if (is_paging(vcpu)
899 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
900 return 1;
901
902 efer &= ~EFER_LMA;
903 efer |= vcpu->arch.efer & EFER_LMA;
904
905 kvm_x86_ops->set_efer(vcpu, efer);
906
907
908 if ((efer ^ old_efer) & EFER_NX)
909 kvm_mmu_reset_context(vcpu);
910
911 return 0;
912}
913
914void kvm_enable_efer_bits(u64 mask)
915{
916 efer_reserved_bits &= ~mask;
917}
918EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
919
920
921
922
923
924
925
926int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
927{
928 return kvm_x86_ops->set_msr(vcpu, msr);
929}
930
931
932
933
934static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
935{
936 struct msr_data msr;
937
938 msr.data = *data;
939 msr.index = index;
940 msr.host_initiated = true;
941 return kvm_set_msr(vcpu, &msr);
942}
943
944#ifdef CONFIG_X86_64
945struct pvclock_gtod_data {
946 seqcount_t seq;
947
948 struct {
949 int vclock_mode;
950 cycle_t cycle_last;
951 cycle_t mask;
952 u32 mult;
953 u32 shift;
954 } clock;
955
956
957 u64 monotonic_time_snsec;
958 time_t monotonic_time_sec;
959};
960
961static struct pvclock_gtod_data pvclock_gtod_data;
962
963static void update_pvclock_gtod(struct timekeeper *tk)
964{
965 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
966
967 write_seqcount_begin(&vdata->seq);
968
969
970 vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
971 vdata->clock.cycle_last = tk->clock->cycle_last;
972 vdata->clock.mask = tk->clock->mask;
973 vdata->clock.mult = tk->mult;
974 vdata->clock.shift = tk->shift;
975
976 vdata->monotonic_time_sec = tk->xtime_sec
977 + tk->wall_to_monotonic.tv_sec;
978 vdata->monotonic_time_snsec = tk->xtime_nsec
979 + (tk->wall_to_monotonic.tv_nsec
980 << tk->shift);
981 while (vdata->monotonic_time_snsec >=
982 (((u64)NSEC_PER_SEC) << tk->shift)) {
983 vdata->monotonic_time_snsec -=
984 ((u64)NSEC_PER_SEC) << tk->shift;
985 vdata->monotonic_time_sec++;
986 }
987
988 write_seqcount_end(&vdata->seq);
989}
990#endif
991
992
993static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
994{
995 int version;
996 int r;
997 struct pvclock_wall_clock wc;
998 struct timespec boot;
999
1000 if (!wall_clock)
1001 return;
1002
1003 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1004 if (r)
1005 return;
1006
1007 if (version & 1)
1008 ++version;
1009
1010 ++version;
1011
1012 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1013
1014
1015
1016
1017
1018
1019
1020 getboottime(&boot);
1021
1022 if (kvm->arch.kvmclock_offset) {
1023 struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
1024 boot = timespec_sub(boot, ts);
1025 }
1026 wc.sec = boot.tv_sec;
1027 wc.nsec = boot.tv_nsec;
1028 wc.version = version;
1029
1030 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1031
1032 version++;
1033 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1034}
1035
1036static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1037{
1038 uint32_t quotient, remainder;
1039
1040
1041
1042 __asm__ ( "divl %4"
1043 : "=a" (quotient), "=d" (remainder)
1044 : "0" (0), "1" (dividend), "r" (divisor) );
1045 return quotient;
1046}
1047
1048static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1049 s8 *pshift, u32 *pmultiplier)
1050{
1051 uint64_t scaled64;
1052 int32_t shift = 0;
1053 uint64_t tps64;
1054 uint32_t tps32;
1055
1056 tps64 = base_khz * 1000LL;
1057 scaled64 = scaled_khz * 1000LL;
1058 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1059 tps64 >>= 1;
1060 shift--;
1061 }
1062
1063 tps32 = (uint32_t)tps64;
1064 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1065 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1066 scaled64 >>= 1;
1067 else
1068 tps32 <<= 1;
1069 shift++;
1070 }
1071
1072 *pshift = shift;
1073 *pmultiplier = div_frac(scaled64, tps32);
1074
1075 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
1076 __func__, base_khz, scaled_khz, shift, *pmultiplier);
1077}
1078
1079static inline u64 get_kernel_ns(void)
1080{
1081 struct timespec ts;
1082
1083 WARN_ON(preemptible());
1084 ktime_get_ts(&ts);
1085 monotonic_to_bootbased(&ts);
1086 return timespec_to_ns(&ts);
1087}
1088
1089#ifdef CONFIG_X86_64
1090static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1091#endif
1092
1093static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1094unsigned long max_tsc_khz;
1095
1096static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
1097{
1098 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
1099 vcpu->arch.virtual_tsc_shift);
1100}
1101
1102static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1103{
1104 u64 v = (u64)khz * (1000000 + ppm);
1105 do_div(v, 1000000);
1106 return v;
1107}
1108
1109static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1110{
1111 u32 thresh_lo, thresh_hi;
1112 int use_scaling = 0;
1113
1114
1115 if (this_tsc_khz == 0)
1116 return;
1117
1118
1119 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1120 &vcpu->arch.virtual_tsc_shift,
1121 &vcpu->arch.virtual_tsc_mult);
1122 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
1123
1124
1125
1126
1127
1128
1129
1130 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1131 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1132 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1133 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1134 use_scaling = 1;
1135 }
1136 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1137}
1138
1139static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1140{
1141 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1142 vcpu->arch.virtual_tsc_mult,
1143 vcpu->arch.virtual_tsc_shift);
1144 tsc += vcpu->arch.this_tsc_write;
1145 return tsc;
1146}
1147
1148void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1149{
1150#ifdef CONFIG_X86_64
1151 bool vcpus_matched;
1152 bool do_request = false;
1153 struct kvm_arch *ka = &vcpu->kvm->arch;
1154 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1155
1156 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1157 atomic_read(&vcpu->kvm->online_vcpus));
1158
1159 if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
1160 if (!ka->use_master_clock)
1161 do_request = 1;
1162
1163 if (!vcpus_matched && ka->use_master_clock)
1164 do_request = 1;
1165
1166 if (do_request)
1167 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1168
1169 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1170 atomic_read(&vcpu->kvm->online_vcpus),
1171 ka->use_master_clock, gtod->clock.vclock_mode);
1172#endif
1173}
1174
1175static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1176{
1177 u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
1178 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1179}
1180
1181void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1182{
1183 struct kvm *kvm = vcpu->kvm;
1184 u64 offset, ns, elapsed;
1185 unsigned long flags;
1186 s64 usdiff;
1187 bool matched;
1188 u64 data = msr->data;
1189
1190 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1191 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1192 ns = get_kernel_ns();
1193 elapsed = ns - kvm->arch.last_tsc_nsec;
1194
1195 if (vcpu->arch.virtual_tsc_khz) {
1196 int faulted = 0;
1197
1198
1199 usdiff = data - kvm->arch.last_tsc_write;
1200#ifdef CONFIG_X86_64
1201 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1202#else
1203
1204 asm("1: idivl %[divisor]\n"
1205 "2: xor %%edx, %%edx\n"
1206 " movl $0, %[faulted]\n"
1207 "3:\n"
1208 ".section .fixup,\"ax\"\n"
1209 "4: movl $1, %[faulted]\n"
1210 " jmp 3b\n"
1211 ".previous\n"
1212
1213 _ASM_EXTABLE(1b, 4b)
1214
1215 : "=A"(usdiff), [faulted] "=r" (faulted)
1216 : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
1217
1218#endif
1219 do_div(elapsed, 1000);
1220 usdiff -= elapsed;
1221 if (usdiff < 0)
1222 usdiff = -usdiff;
1223
1224
1225 if (faulted)
1226 usdiff = USEC_PER_SEC;
1227 } else
1228 usdiff = USEC_PER_SEC;
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240 if (usdiff < USEC_PER_SEC &&
1241 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1242 if (!check_tsc_unstable()) {
1243 offset = kvm->arch.cur_tsc_offset;
1244 pr_debug("kvm: matched tsc offset for %llu\n", data);
1245 } else {
1246 u64 delta = nsec_to_cycles(vcpu, elapsed);
1247 data += delta;
1248 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1249 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1250 }
1251 matched = true;
1252 } else {
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262 kvm->arch.cur_tsc_generation++;
1263 kvm->arch.cur_tsc_nsec = ns;
1264 kvm->arch.cur_tsc_write = data;
1265 kvm->arch.cur_tsc_offset = offset;
1266 matched = false;
1267 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1268 kvm->arch.cur_tsc_generation, data);
1269 }
1270
1271
1272
1273
1274
1275 kvm->arch.last_tsc_nsec = ns;
1276 kvm->arch.last_tsc_write = data;
1277 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1278
1279
1280 vcpu->arch.hv_clock.tsc_timestamp = 0;
1281 vcpu->arch.last_guest_tsc = data;
1282
1283
1284 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1285 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1286 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1287
1288 if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
1289 update_ia32_tsc_adjust_msr(vcpu, offset);
1290 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1291 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1292
1293 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1294 if (matched)
1295 kvm->arch.nr_vcpus_matched_tsc++;
1296 else
1297 kvm->arch.nr_vcpus_matched_tsc = 0;
1298
1299 kvm_track_tsc_matching(vcpu);
1300 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1301}
1302
1303EXPORT_SYMBOL_GPL(kvm_write_tsc);
1304
1305#ifdef CONFIG_X86_64
1306
1307static cycle_t read_tsc(void)
1308{
1309 cycle_t ret;
1310 u64 last;
1311
1312
1313
1314
1315
1316
1317
1318
1319 rdtsc_barrier();
1320 ret = (cycle_t)vget_cycles();
1321
1322 last = pvclock_gtod_data.clock.cycle_last;
1323
1324 if (likely(ret >= last))
1325 return ret;
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335 asm volatile ("");
1336 return last;
1337}
1338
1339static inline u64 vgettsc(cycle_t *cycle_now)
1340{
1341 long v;
1342 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1343
1344 *cycle_now = read_tsc();
1345
1346 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1347 return v * gtod->clock.mult;
1348}
1349
1350static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
1351{
1352 unsigned long seq;
1353 u64 ns;
1354 int mode;
1355 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1356
1357 ts->tv_nsec = 0;
1358 do {
1359 seq = read_seqcount_begin(>od->seq);
1360 mode = gtod->clock.vclock_mode;
1361 ts->tv_sec = gtod->monotonic_time_sec;
1362 ns = gtod->monotonic_time_snsec;
1363 ns += vgettsc(cycle_now);
1364 ns >>= gtod->clock.shift;
1365 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1366 timespec_add_ns(ts, ns);
1367
1368 return mode;
1369}
1370
1371
1372static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1373{
1374 struct timespec ts;
1375
1376
1377 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1378 return false;
1379
1380 if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
1381 return false;
1382
1383 monotonic_to_bootbased(&ts);
1384 *kernel_ns = timespec_to_ns(&ts);
1385
1386 return true;
1387}
1388#endif
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1432{
1433#ifdef CONFIG_X86_64
1434 struct kvm_arch *ka = &kvm->arch;
1435 int vclock_mode;
1436 bool host_tsc_clocksource, vcpus_matched;
1437
1438 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1439 atomic_read(&kvm->online_vcpus));
1440
1441
1442
1443
1444
1445 host_tsc_clocksource = kvm_get_time_and_clockread(
1446 &ka->master_kernel_ns,
1447 &ka->master_cycle_now);
1448
1449 ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
1450
1451 if (ka->use_master_clock)
1452 atomic_set(&kvm_guest_has_master_clock, 1);
1453
1454 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1455 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1456 vcpus_matched);
1457#endif
1458}
1459
1460static int kvm_guest_time_update(struct kvm_vcpu *v)
1461{
1462 unsigned long flags, this_tsc_khz;
1463 struct kvm_vcpu_arch *vcpu = &v->arch;
1464 struct kvm_arch *ka = &v->kvm->arch;
1465 s64 kernel_ns, max_kernel_ns;
1466 u64 tsc_timestamp, host_tsc;
1467 struct pvclock_vcpu_time_info guest_hv_clock;
1468 u8 pvclock_flags;
1469 bool use_master_clock;
1470
1471 kernel_ns = 0;
1472 host_tsc = 0;
1473
1474
1475
1476
1477
1478 spin_lock(&ka->pvclock_gtod_sync_lock);
1479 use_master_clock = ka->use_master_clock;
1480 if (use_master_clock) {
1481 host_tsc = ka->master_cycle_now;
1482 kernel_ns = ka->master_kernel_ns;
1483 }
1484 spin_unlock(&ka->pvclock_gtod_sync_lock);
1485
1486
1487 local_irq_save(flags);
1488 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1489 if (unlikely(this_tsc_khz == 0)) {
1490 local_irq_restore(flags);
1491 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1492 return 1;
1493 }
1494 if (!use_master_clock) {
1495 host_tsc = native_read_tsc();
1496 kernel_ns = get_kernel_ns();
1497 }
1498
1499 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511 if (vcpu->tsc_catchup) {
1512 u64 tsc = compute_guest_tsc(v, kernel_ns);
1513 if (tsc > tsc_timestamp) {
1514 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1515 tsc_timestamp = tsc;
1516 }
1517 }
1518
1519 local_irq_restore(flags);
1520
1521 if (!vcpu->pv_time_enabled)
1522 return 0;
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545 max_kernel_ns = 0;
1546 if (vcpu->hv_clock.tsc_timestamp) {
1547 max_kernel_ns = vcpu->last_guest_tsc -
1548 vcpu->hv_clock.tsc_timestamp;
1549 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1550 vcpu->hv_clock.tsc_to_system_mul,
1551 vcpu->hv_clock.tsc_shift);
1552 max_kernel_ns += vcpu->last_kernel_ns;
1553 }
1554
1555 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1556 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1557 &vcpu->hv_clock.tsc_shift,
1558 &vcpu->hv_clock.tsc_to_system_mul);
1559 vcpu->hw_tsc_khz = this_tsc_khz;
1560 }
1561
1562
1563
1564
1565
1566 if (!use_master_clock) {
1567 if (max_kernel_ns > kernel_ns)
1568 kernel_ns = max_kernel_ns;
1569 }
1570
1571 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1572 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1573 vcpu->last_kernel_ns = kernel_ns;
1574 vcpu->last_guest_tsc = tsc_timestamp;
1575
1576
1577
1578
1579
1580
1581 vcpu->hv_clock.version += 2;
1582
1583 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1584 &guest_hv_clock, sizeof(guest_hv_clock))))
1585 return 0;
1586
1587
1588 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
1589
1590 if (vcpu->pvclock_set_guest_stopped_request) {
1591 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1592 vcpu->pvclock_set_guest_stopped_request = false;
1593 }
1594
1595
1596 if (use_master_clock)
1597 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1598
1599 vcpu->hv_clock.flags = pvclock_flags;
1600
1601 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1602 &vcpu->hv_clock,
1603 sizeof(vcpu->hv_clock));
1604 return 0;
1605}
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1620{
1621 int i;
1622 struct kvm *kvm = v->kvm;
1623 struct kvm_vcpu *vcpu;
1624
1625 kvm_for_each_vcpu(i, vcpu, kvm) {
1626 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
1627 kvm_vcpu_kick(vcpu);
1628 }
1629}
1630
1631static bool msr_mtrr_valid(unsigned msr)
1632{
1633 switch (msr) {
1634 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1635 case MSR_MTRRfix64K_00000:
1636 case MSR_MTRRfix16K_80000:
1637 case MSR_MTRRfix16K_A0000:
1638 case MSR_MTRRfix4K_C0000:
1639 case MSR_MTRRfix4K_C8000:
1640 case MSR_MTRRfix4K_D0000:
1641 case MSR_MTRRfix4K_D8000:
1642 case MSR_MTRRfix4K_E0000:
1643 case MSR_MTRRfix4K_E8000:
1644 case MSR_MTRRfix4K_F0000:
1645 case MSR_MTRRfix4K_F8000:
1646 case MSR_MTRRdefType:
1647 case MSR_IA32_CR_PAT:
1648 return true;
1649 case 0x2f8:
1650 return true;
1651 }
1652 return false;
1653}
1654
1655static bool valid_pat_type(unsigned t)
1656{
1657 return t < 8 && (1 << t) & 0xf3;
1658}
1659
1660static bool valid_mtrr_type(unsigned t)
1661{
1662 return t < 8 && (1 << t) & 0x73;
1663}
1664
1665static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1666{
1667 int i;
1668
1669 if (!msr_mtrr_valid(msr))
1670 return false;
1671
1672 if (msr == MSR_IA32_CR_PAT) {
1673 for (i = 0; i < 8; i++)
1674 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1675 return false;
1676 return true;
1677 } else if (msr == MSR_MTRRdefType) {
1678 if (data & ~0xcff)
1679 return false;
1680 return valid_mtrr_type(data & 0xff);
1681 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1682 for (i = 0; i < 8 ; i++)
1683 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1684 return false;
1685 return true;
1686 }
1687
1688
1689 return valid_mtrr_type(data & 0xff);
1690}
1691
1692static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1693{
1694 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1695
1696 if (!mtrr_valid(vcpu, msr, data))
1697 return 1;
1698
1699 if (msr == MSR_MTRRdefType) {
1700 vcpu->arch.mtrr_state.def_type = data;
1701 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1702 } else if (msr == MSR_MTRRfix64K_00000)
1703 p[0] = data;
1704 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1705 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1706 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1707 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1708 else if (msr == MSR_IA32_CR_PAT)
1709 vcpu->arch.pat = data;
1710 else {
1711 int idx, is_mtrr_mask;
1712 u64 *pt;
1713
1714 idx = (msr - 0x200) / 2;
1715 is_mtrr_mask = msr - 0x200 - 2 * idx;
1716 if (!is_mtrr_mask)
1717 pt =
1718 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1719 else
1720 pt =
1721 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1722 *pt = data;
1723 }
1724
1725 kvm_mmu_reset_context(vcpu);
1726 return 0;
1727}
1728
1729static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1730{
1731 u64 mcg_cap = vcpu->arch.mcg_cap;
1732 unsigned bank_num = mcg_cap & 0xff;
1733
1734 switch (msr) {
1735 case MSR_IA32_MCG_STATUS:
1736 vcpu->arch.mcg_status = data;
1737 break;
1738 case MSR_IA32_MCG_CTL:
1739 if (!(mcg_cap & MCG_CTL_P))
1740 return 1;
1741 if (data != 0 && data != ~(u64)0)
1742 return -1;
1743 vcpu->arch.mcg_ctl = data;
1744 break;
1745 default:
1746 if (msr >= MSR_IA32_MC0_CTL &&
1747 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1748 u32 offset = msr - MSR_IA32_MC0_CTL;
1749
1750
1751
1752
1753
1754 if ((offset & 0x3) == 0 &&
1755 data != 0 && (data | (1 << 10)) != ~(u64)0)
1756 return -1;
1757 vcpu->arch.mce_banks[offset] = data;
1758 break;
1759 }
1760 return 1;
1761 }
1762 return 0;
1763}
1764
1765static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1766{
1767 struct kvm *kvm = vcpu->kvm;
1768 int lm = is_long_mode(vcpu);
1769 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1770 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1771 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1772 : kvm->arch.xen_hvm_config.blob_size_32;
1773 u32 page_num = data & ~PAGE_MASK;
1774 u64 page_addr = data & PAGE_MASK;
1775 u8 *page;
1776 int r;
1777
1778 r = -E2BIG;
1779 if (page_num >= blob_size)
1780 goto out;
1781 r = -ENOMEM;
1782 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
1783 if (IS_ERR(page)) {
1784 r = PTR_ERR(page);
1785 goto out;
1786 }
1787 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1788 goto out_free;
1789 r = 0;
1790out_free:
1791 kfree(page);
1792out:
1793 return r;
1794}
1795
1796static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1797{
1798 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1799}
1800
1801static bool kvm_hv_msr_partition_wide(u32 msr)
1802{
1803 bool r = false;
1804 switch (msr) {
1805 case HV_X64_MSR_GUEST_OS_ID:
1806 case HV_X64_MSR_HYPERCALL:
1807 r = true;
1808 break;
1809 }
1810
1811 return r;
1812}
1813
1814static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1815{
1816 struct kvm *kvm = vcpu->kvm;
1817
1818 switch (msr) {
1819 case HV_X64_MSR_GUEST_OS_ID:
1820 kvm->arch.hv_guest_os_id = data;
1821
1822 if (!kvm->arch.hv_guest_os_id)
1823 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1824 break;
1825 case HV_X64_MSR_HYPERCALL: {
1826 u64 gfn;
1827 unsigned long addr;
1828 u8 instructions[4];
1829
1830
1831 if (!kvm->arch.hv_guest_os_id)
1832 break;
1833 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1834 kvm->arch.hv_hypercall = data;
1835 break;
1836 }
1837 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1838 addr = gfn_to_hva(kvm, gfn);
1839 if (kvm_is_error_hva(addr))
1840 return 1;
1841 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1842 ((unsigned char *)instructions)[3] = 0xc3;
1843 if (__copy_to_user((void __user *)addr, instructions, 4))
1844 return 1;
1845 kvm->arch.hv_hypercall = data;
1846 break;
1847 }
1848 default:
1849 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1850 "data 0x%llx\n", msr, data);
1851 return 1;
1852 }
1853 return 0;
1854}
1855
1856static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1857{
1858 switch (msr) {
1859 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1860 unsigned long addr;
1861
1862 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1863 vcpu->arch.hv_vapic = data;
1864 break;
1865 }
1866 addr = gfn_to_hva(vcpu->kvm, data >>
1867 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1868 if (kvm_is_error_hva(addr))
1869 return 1;
1870 if (__clear_user((void __user *)addr, PAGE_SIZE))
1871 return 1;
1872 vcpu->arch.hv_vapic = data;
1873 break;
1874 }
1875 case HV_X64_MSR_EOI:
1876 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1877 case HV_X64_MSR_ICR:
1878 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1879 case HV_X64_MSR_TPR:
1880 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1881 default:
1882 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1883 "data 0x%llx\n", msr, data);
1884 return 1;
1885 }
1886
1887 return 0;
1888}
1889
1890static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1891{
1892 gpa_t gpa = data & ~0x3f;
1893
1894
1895 if (data & 0x3c)
1896 return 1;
1897
1898 vcpu->arch.apf.msr_val = data;
1899
1900 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1901 kvm_clear_async_pf_completion_queue(vcpu);
1902 kvm_async_pf_hash_reset(vcpu);
1903 return 0;
1904 }
1905
1906 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
1907 sizeof(u32)))
1908 return 1;
1909
1910 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1911 kvm_async_pf_wakeup_all(vcpu);
1912 return 0;
1913}
1914
1915static void kvmclock_reset(struct kvm_vcpu *vcpu)
1916{
1917 vcpu->arch.pv_time_enabled = false;
1918}
1919
1920static void accumulate_steal_time(struct kvm_vcpu *vcpu)
1921{
1922 u64 delta;
1923
1924 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1925 return;
1926
1927 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
1928 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1929 vcpu->arch.st.accum_steal = delta;
1930}
1931
1932static void record_steal_time(struct kvm_vcpu *vcpu)
1933{
1934 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1935 return;
1936
1937 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1938 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
1939 return;
1940
1941 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
1942 vcpu->arch.st.steal.version += 2;
1943 vcpu->arch.st.accum_steal = 0;
1944
1945 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1946 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1947}
1948
1949int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1950{
1951 bool pr = false;
1952 u32 msr = msr_info->index;
1953 u64 data = msr_info->data;
1954
1955 switch (msr) {
1956 case MSR_AMD64_NB_CFG:
1957 case MSR_IA32_UCODE_REV:
1958 case MSR_IA32_UCODE_WRITE:
1959 case MSR_VM_HSAVE_PA:
1960 case MSR_AMD64_PATCH_LOADER:
1961 case MSR_AMD64_BU_CFG2:
1962 break;
1963
1964 case MSR_EFER:
1965 return set_efer(vcpu, data);
1966 case MSR_K7_HWCR:
1967 data &= ~(u64)0x40;
1968 data &= ~(u64)0x100;
1969 data &= ~(u64)0x8;
1970 if (data != 0) {
1971 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1972 data);
1973 return 1;
1974 }
1975 break;
1976 case MSR_FAM10H_MMIO_CONF_BASE:
1977 if (data != 0) {
1978 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1979 "0x%llx\n", data);
1980 return 1;
1981 }
1982 break;
1983 case MSR_IA32_DEBUGCTLMSR:
1984 if (!data) {
1985
1986 break;
1987 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
1988
1989
1990 return 1;
1991 }
1992 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1993 __func__, data);
1994 break;
1995 case 0x200 ... 0x2ff:
1996 return set_msr_mtrr(vcpu, msr, data);
1997 case MSR_IA32_APICBASE:
1998 kvm_set_apic_base(vcpu, data);
1999 break;
2000 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2001 return kvm_x2apic_msr_write(vcpu, msr, data);
2002 case MSR_IA32_TSCDEADLINE:
2003 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2004 break;
2005 case MSR_IA32_TSC_ADJUST:
2006 if (guest_cpuid_has_tsc_adjust(vcpu)) {
2007 if (!msr_info->host_initiated) {
2008 u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2009 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
2010 }
2011 vcpu->arch.ia32_tsc_adjust_msr = data;
2012 }
2013 break;
2014 case MSR_IA32_MISC_ENABLE:
2015 vcpu->arch.ia32_misc_enable_msr = data;
2016 break;
2017 case MSR_KVM_WALL_CLOCK_NEW:
2018 case MSR_KVM_WALL_CLOCK:
2019 vcpu->kvm->arch.wall_clock = data;
2020 kvm_write_wall_clock(vcpu->kvm, data);
2021 break;
2022 case MSR_KVM_SYSTEM_TIME_NEW:
2023 case MSR_KVM_SYSTEM_TIME: {
2024 u64 gpa_offset;
2025 kvmclock_reset(vcpu);
2026
2027 vcpu->arch.time = data;
2028 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2029
2030
2031 if (!(data & 1))
2032 break;
2033
2034 gpa_offset = data & ~(PAGE_MASK | 1);
2035
2036 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2037 &vcpu->arch.pv_time, data & ~1ULL,
2038 sizeof(struct pvclock_vcpu_time_info)))
2039 vcpu->arch.pv_time_enabled = false;
2040 else
2041 vcpu->arch.pv_time_enabled = true;
2042
2043 break;
2044 }
2045 case MSR_KVM_ASYNC_PF_EN:
2046 if (kvm_pv_enable_async_pf(vcpu, data))
2047 return 1;
2048 break;
2049 case MSR_KVM_STEAL_TIME:
2050
2051 if (unlikely(!sched_info_on()))
2052 return 1;
2053
2054 if (data & KVM_STEAL_RESERVED_MASK)
2055 return 1;
2056
2057 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2058 data & KVM_STEAL_VALID_BITS,
2059 sizeof(struct kvm_steal_time)))
2060 return 1;
2061
2062 vcpu->arch.st.msr_val = data;
2063
2064 if (!(data & KVM_MSR_ENABLED))
2065 break;
2066
2067 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2068
2069 preempt_disable();
2070 accumulate_steal_time(vcpu);
2071 preempt_enable();
2072
2073 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2074
2075 break;
2076 case MSR_KVM_PV_EOI_EN:
2077 if (kvm_lapic_enable_pv_eoi(vcpu, data))
2078 return 1;
2079 break;
2080
2081 case MSR_IA32_MCG_CTL:
2082 case MSR_IA32_MCG_STATUS:
2083 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2084 return set_msr_mce(vcpu, msr, data);
2085
2086
2087
2088
2089
2090
2091
2092
2093 case MSR_K7_EVNTSEL0:
2094 case MSR_K7_EVNTSEL1:
2095 case MSR_K7_EVNTSEL2:
2096 case MSR_K7_EVNTSEL3:
2097 if (data != 0)
2098 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2099 "0x%x data 0x%llx\n", msr, data);
2100 break;
2101
2102
2103
2104 case MSR_K7_PERFCTR0:
2105 case MSR_K7_PERFCTR1:
2106 case MSR_K7_PERFCTR2:
2107 case MSR_K7_PERFCTR3:
2108 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2109 "0x%x data 0x%llx\n", msr, data);
2110 break;
2111 case MSR_P6_PERFCTR0:
2112 case MSR_P6_PERFCTR1:
2113 pr = true;
2114 case MSR_P6_EVNTSEL0:
2115 case MSR_P6_EVNTSEL1:
2116 if (kvm_pmu_msr(vcpu, msr))
2117 return kvm_pmu_set_msr(vcpu, msr_info);
2118
2119 if (pr || data != 0)
2120 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2121 "0x%x data 0x%llx\n", msr, data);
2122 break;
2123 case MSR_K7_CLK_CTL:
2124
2125
2126
2127
2128
2129
2130
2131
2132 break;
2133 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2134 if (kvm_hv_msr_partition_wide(msr)) {
2135 int r;
2136 mutex_lock(&vcpu->kvm->lock);
2137 r = set_msr_hyperv_pw(vcpu, msr, data);
2138 mutex_unlock(&vcpu->kvm->lock);
2139 return r;
2140 } else
2141 return set_msr_hyperv(vcpu, msr, data);
2142 break;
2143 case MSR_IA32_BBL_CR_CTL3:
2144
2145
2146
2147 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
2148 break;
2149 case MSR_AMD64_OSVW_ID_LENGTH:
2150 if (!guest_cpuid_has_osvw(vcpu))
2151 return 1;
2152 vcpu->arch.osvw.length = data;
2153 break;
2154 case MSR_AMD64_OSVW_STATUS:
2155 if (!guest_cpuid_has_osvw(vcpu))
2156 return 1;
2157 vcpu->arch.osvw.status = data;
2158 break;
2159 default:
2160 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2161 return xen_hvm_config(vcpu, data);
2162 if (kvm_pmu_msr(vcpu, msr))
2163 return kvm_pmu_set_msr(vcpu, msr_info);
2164 if (!ignore_msrs) {
2165 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
2166 msr, data);
2167 return 1;
2168 } else {
2169 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
2170 msr, data);
2171 break;
2172 }
2173 }
2174 return 0;
2175}
2176EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2177
2178
2179
2180
2181
2182
2183
2184int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2185{
2186 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
2187}
2188
2189static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2190{
2191 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
2192
2193 if (!msr_mtrr_valid(msr))
2194 return 1;
2195
2196 if (msr == MSR_MTRRdefType)
2197 *pdata = vcpu->arch.mtrr_state.def_type +
2198 (vcpu->arch.mtrr_state.enabled << 10);
2199 else if (msr == MSR_MTRRfix64K_00000)
2200 *pdata = p[0];
2201 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
2202 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
2203 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
2204 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
2205 else if (msr == MSR_IA32_CR_PAT)
2206 *pdata = vcpu->arch.pat;
2207 else {
2208 int idx, is_mtrr_mask;
2209 u64 *pt;
2210
2211 idx = (msr - 0x200) / 2;
2212 is_mtrr_mask = msr - 0x200 - 2 * idx;
2213 if (!is_mtrr_mask)
2214 pt =
2215 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
2216 else
2217 pt =
2218 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
2219 *pdata = *pt;
2220 }
2221
2222 return 0;
2223}
2224
2225static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2226{
2227 u64 data;
2228 u64 mcg_cap = vcpu->arch.mcg_cap;
2229 unsigned bank_num = mcg_cap & 0xff;
2230
2231 switch (msr) {
2232 case MSR_IA32_P5_MC_ADDR:
2233 case MSR_IA32_P5_MC_TYPE:
2234 data = 0;
2235 break;
2236 case MSR_IA32_MCG_CAP:
2237 data = vcpu->arch.mcg_cap;
2238 break;
2239 case MSR_IA32_MCG_CTL:
2240 if (!(mcg_cap & MCG_CTL_P))
2241 return 1;
2242 data = vcpu->arch.mcg_ctl;
2243 break;
2244 case MSR_IA32_MCG_STATUS:
2245 data = vcpu->arch.mcg_status;
2246 break;
2247 default:
2248 if (msr >= MSR_IA32_MC0_CTL &&
2249 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
2250 u32 offset = msr - MSR_IA32_MC0_CTL;
2251 data = vcpu->arch.mce_banks[offset];
2252 break;
2253 }
2254 return 1;
2255 }
2256 *pdata = data;
2257 return 0;
2258}
2259
2260static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2261{
2262 u64 data = 0;
2263 struct kvm *kvm = vcpu->kvm;
2264
2265 switch (msr) {
2266 case HV_X64_MSR_GUEST_OS_ID:
2267 data = kvm->arch.hv_guest_os_id;
2268 break;
2269 case HV_X64_MSR_HYPERCALL:
2270 data = kvm->arch.hv_hypercall;
2271 break;
2272 default:
2273 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2274 return 1;
2275 }
2276
2277 *pdata = data;
2278 return 0;
2279}
2280
2281static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2282{
2283 u64 data = 0;
2284
2285 switch (msr) {
2286 case HV_X64_MSR_VP_INDEX: {
2287 int r;
2288 struct kvm_vcpu *v;
2289 kvm_for_each_vcpu(r, v, vcpu->kvm)
2290 if (v == vcpu)
2291 data = r;
2292 break;
2293 }
2294 case HV_X64_MSR_EOI:
2295 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
2296 case HV_X64_MSR_ICR:
2297 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
2298 case HV_X64_MSR_TPR:
2299 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
2300 case HV_X64_MSR_APIC_ASSIST_PAGE:
2301 data = vcpu->arch.hv_vapic;
2302 break;
2303 default:
2304 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2305 return 1;
2306 }
2307 *pdata = data;
2308 return 0;
2309}
2310
2311int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2312{
2313 u64 data;
2314
2315 switch (msr) {
2316 case MSR_IA32_PLATFORM_ID:
2317 case MSR_IA32_EBL_CR_POWERON:
2318 case MSR_IA32_DEBUGCTLMSR:
2319 case MSR_IA32_LASTBRANCHFROMIP:
2320 case MSR_IA32_LASTBRANCHTOIP:
2321 case MSR_IA32_LASTINTFROMIP:
2322 case MSR_IA32_LASTINTTOIP:
2323 case MSR_K8_SYSCFG:
2324 case MSR_K7_HWCR:
2325 case MSR_VM_HSAVE_PA:
2326 case MSR_K7_EVNTSEL0:
2327 case MSR_K7_PERFCTR0:
2328 case MSR_K8_INT_PENDING_MSG:
2329 case MSR_AMD64_NB_CFG:
2330 case MSR_FAM10H_MMIO_CONF_BASE:
2331 case MSR_AMD64_BU_CFG2:
2332 data = 0;
2333 break;
2334 case MSR_P6_PERFCTR0:
2335 case MSR_P6_PERFCTR1:
2336 case MSR_P6_EVNTSEL0:
2337 case MSR_P6_EVNTSEL1:
2338 if (kvm_pmu_msr(vcpu, msr))
2339 return kvm_pmu_get_msr(vcpu, msr, pdata);
2340 data = 0;
2341 break;
2342 case MSR_IA32_UCODE_REV:
2343 data = 0x100000000ULL;
2344 break;
2345 case MSR_MTRRcap:
2346 data = 0x500 | KVM_NR_VAR_MTRR;
2347 break;
2348 case 0x200 ... 0x2ff:
2349 return get_msr_mtrr(vcpu, msr, pdata);
2350 case 0xcd:
2351 data = 3;
2352 break;
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364 case MSR_EBC_FREQUENCY_ID:
2365 data = 1 << 24;
2366 break;
2367 case MSR_IA32_APICBASE:
2368 data = kvm_get_apic_base(vcpu);
2369 break;
2370 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2371 return kvm_x2apic_msr_read(vcpu, msr, pdata);
2372 break;
2373 case MSR_IA32_TSCDEADLINE:
2374 data = kvm_get_lapic_tscdeadline_msr(vcpu);
2375 break;
2376 case MSR_IA32_TSC_ADJUST:
2377 data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2378 break;
2379 case MSR_IA32_MISC_ENABLE:
2380 data = vcpu->arch.ia32_misc_enable_msr;
2381 break;
2382 case MSR_IA32_PERF_STATUS:
2383
2384 data = 1000ULL;
2385
2386 data |= (((uint64_t)4ULL) << 40);
2387 break;
2388 case MSR_EFER:
2389 data = vcpu->arch.efer;
2390 break;
2391 case MSR_KVM_WALL_CLOCK:
2392 case MSR_KVM_WALL_CLOCK_NEW:
2393 data = vcpu->kvm->arch.wall_clock;
2394 break;
2395 case MSR_KVM_SYSTEM_TIME:
2396 case MSR_KVM_SYSTEM_TIME_NEW:
2397 data = vcpu->arch.time;
2398 break;
2399 case MSR_KVM_ASYNC_PF_EN:
2400 data = vcpu->arch.apf.msr_val;
2401 break;
2402 case MSR_KVM_STEAL_TIME:
2403 data = vcpu->arch.st.msr_val;
2404 break;
2405 case MSR_KVM_PV_EOI_EN:
2406 data = vcpu->arch.pv_eoi.msr_val;
2407 break;
2408 case MSR_IA32_P5_MC_ADDR:
2409 case MSR_IA32_P5_MC_TYPE:
2410 case MSR_IA32_MCG_CAP:
2411 case MSR_IA32_MCG_CTL:
2412 case MSR_IA32_MCG_STATUS:
2413 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2414 return get_msr_mce(vcpu, msr, pdata);
2415 case MSR_K7_CLK_CTL:
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425 data = 0x20000000;
2426 break;
2427 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2428 if (kvm_hv_msr_partition_wide(msr)) {
2429 int r;
2430 mutex_lock(&vcpu->kvm->lock);
2431 r = get_msr_hyperv_pw(vcpu, msr, pdata);
2432 mutex_unlock(&vcpu->kvm->lock);
2433 return r;
2434 } else
2435 return get_msr_hyperv(vcpu, msr, pdata);
2436 break;
2437 case MSR_IA32_BBL_CR_CTL3:
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448 data = 0xbe702111;
2449 break;
2450 case MSR_AMD64_OSVW_ID_LENGTH:
2451 if (!guest_cpuid_has_osvw(vcpu))
2452 return 1;
2453 data = vcpu->arch.osvw.length;
2454 break;
2455 case MSR_AMD64_OSVW_STATUS:
2456 if (!guest_cpuid_has_osvw(vcpu))
2457 return 1;
2458 data = vcpu->arch.osvw.status;
2459 break;
2460 default:
2461 if (kvm_pmu_msr(vcpu, msr))
2462 return kvm_pmu_get_msr(vcpu, msr, pdata);
2463 if (!ignore_msrs) {
2464 vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
2465 return 1;
2466 } else {
2467 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
2468 data = 0;
2469 }
2470 break;
2471 }
2472 *pdata = data;
2473 return 0;
2474}
2475EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2476
2477
2478
2479
2480
2481
2482static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2483 struct kvm_msr_entry *entries,
2484 int (*do_msr)(struct kvm_vcpu *vcpu,
2485 unsigned index, u64 *data))
2486{
2487 int i, idx;
2488
2489 idx = srcu_read_lock(&vcpu->kvm->srcu);
2490 for (i = 0; i < msrs->nmsrs; ++i)
2491 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2492 break;
2493 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2494
2495 return i;
2496}
2497
2498
2499
2500
2501
2502
2503static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2504 int (*do_msr)(struct kvm_vcpu *vcpu,
2505 unsigned index, u64 *data),
2506 int writeback)
2507{
2508 struct kvm_msrs msrs;
2509 struct kvm_msr_entry *entries;
2510 int r, n;
2511 unsigned size;
2512
2513 r = -EFAULT;
2514 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2515 goto out;
2516
2517 r = -E2BIG;
2518 if (msrs.nmsrs >= MAX_IO_MSRS)
2519 goto out;
2520
2521 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2522 entries = memdup_user(user_msrs->entries, size);
2523 if (IS_ERR(entries)) {
2524 r = PTR_ERR(entries);
2525 goto out;
2526 }
2527
2528 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2529 if (r < 0)
2530 goto out_free;
2531
2532 r = -EFAULT;
2533 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2534 goto out_free;
2535
2536 r = n;
2537
2538out_free:
2539 kfree(entries);
2540out:
2541 return r;
2542}
2543
2544int kvm_dev_ioctl_check_extension(long ext)
2545{
2546 int r;
2547
2548 switch (ext) {
2549 case KVM_CAP_IRQCHIP:
2550 case KVM_CAP_HLT:
2551 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2552 case KVM_CAP_SET_TSS_ADDR:
2553 case KVM_CAP_EXT_CPUID:
2554 case KVM_CAP_CLOCKSOURCE:
2555 case KVM_CAP_PIT:
2556 case KVM_CAP_NOP_IO_DELAY:
2557 case KVM_CAP_MP_STATE:
2558 case KVM_CAP_SYNC_MMU:
2559 case KVM_CAP_USER_NMI:
2560 case KVM_CAP_REINJECT_CONTROL:
2561 case KVM_CAP_IRQ_INJECT_STATUS:
2562 case KVM_CAP_IRQFD:
2563 case KVM_CAP_IOEVENTFD:
2564 case KVM_CAP_PIT2:
2565 case KVM_CAP_PIT_STATE2:
2566 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2567 case KVM_CAP_XEN_HVM:
2568 case KVM_CAP_ADJUST_CLOCK:
2569 case KVM_CAP_VCPU_EVENTS:
2570 case KVM_CAP_HYPERV:
2571 case KVM_CAP_HYPERV_VAPIC:
2572 case KVM_CAP_HYPERV_SPIN:
2573 case KVM_CAP_PCI_SEGMENT:
2574 case KVM_CAP_DEBUGREGS:
2575 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2576 case KVM_CAP_XSAVE:
2577 case KVM_CAP_ASYNC_PF:
2578 case KVM_CAP_GET_TSC_KHZ:
2579 case KVM_CAP_KVMCLOCK_CTRL:
2580 case KVM_CAP_READONLY_MEM:
2581#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2582 case KVM_CAP_ASSIGN_DEV_IRQ:
2583 case KVM_CAP_PCI_2_3:
2584#endif
2585 r = 1;
2586 break;
2587 case KVM_CAP_COALESCED_MMIO:
2588 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2589 break;
2590 case KVM_CAP_VAPIC:
2591 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2592 break;
2593 case KVM_CAP_NR_VCPUS:
2594 r = KVM_SOFT_MAX_VCPUS;
2595 break;
2596 case KVM_CAP_MAX_VCPUS:
2597 r = KVM_MAX_VCPUS;
2598 break;
2599 case KVM_CAP_NR_MEMSLOTS:
2600 r = KVM_USER_MEM_SLOTS;
2601 break;
2602 case KVM_CAP_PV_MMU:
2603 r = 0;
2604 break;
2605#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2606 case KVM_CAP_IOMMU:
2607 r = iommu_present(&pci_bus_type);
2608 break;
2609#endif
2610 case KVM_CAP_MCE:
2611 r = KVM_MAX_MCE_BANKS;
2612 break;
2613 case KVM_CAP_XCRS:
2614 r = cpu_has_xsave;
2615 break;
2616 case KVM_CAP_TSC_CONTROL:
2617 r = kvm_has_tsc_control;
2618 break;
2619 case KVM_CAP_TSC_DEADLINE_TIMER:
2620 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2621 break;
2622 default:
2623 r = 0;
2624 break;
2625 }
2626 return r;
2627
2628}
2629
2630long kvm_arch_dev_ioctl(struct file *filp,
2631 unsigned int ioctl, unsigned long arg)
2632{
2633 void __user *argp = (void __user *)arg;
2634 long r;
2635
2636 switch (ioctl) {
2637 case KVM_GET_MSR_INDEX_LIST: {
2638 struct kvm_msr_list __user *user_msr_list = argp;
2639 struct kvm_msr_list msr_list;
2640 unsigned n;
2641
2642 r = -EFAULT;
2643 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2644 goto out;
2645 n = msr_list.nmsrs;
2646 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2647 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2648 goto out;
2649 r = -E2BIG;
2650 if (n < msr_list.nmsrs)
2651 goto out;
2652 r = -EFAULT;
2653 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2654 num_msrs_to_save * sizeof(u32)))
2655 goto out;
2656 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2657 &emulated_msrs,
2658 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2659 goto out;
2660 r = 0;
2661 break;
2662 }
2663 case KVM_GET_SUPPORTED_CPUID: {
2664 struct kvm_cpuid2 __user *cpuid_arg = argp;
2665 struct kvm_cpuid2 cpuid;
2666
2667 r = -EFAULT;
2668 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2669 goto out;
2670 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
2671 cpuid_arg->entries);
2672 if (r)
2673 goto out;
2674
2675 r = -EFAULT;
2676 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2677 goto out;
2678 r = 0;
2679 break;
2680 }
2681 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2682 u64 mce_cap;
2683
2684 mce_cap = KVM_MCE_CAP_SUPPORTED;
2685 r = -EFAULT;
2686 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2687 goto out;
2688 r = 0;
2689 break;
2690 }
2691 default:
2692 r = -EINVAL;
2693 }
2694out:
2695 return r;
2696}
2697
2698static void wbinvd_ipi(void *garbage)
2699{
2700 wbinvd();
2701}
2702
2703static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2704{
2705 return vcpu->kvm->arch.iommu_domain &&
2706 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2707}
2708
2709void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2710{
2711
2712 if (need_emulate_wbinvd(vcpu)) {
2713 if (kvm_x86_ops->has_wbinvd_exit())
2714 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2715 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2716 smp_call_function_single(vcpu->cpu,
2717 wbinvd_ipi, NULL, 1);
2718 }
2719
2720 kvm_x86_ops->vcpu_load(vcpu, cpu);
2721
2722
2723 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2724 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2725 vcpu->arch.tsc_offset_adjustment = 0;
2726 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2727 }
2728
2729 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2730 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2731 native_read_tsc() - vcpu->arch.last_host_tsc;
2732 if (tsc_delta < 0)
2733 mark_tsc_unstable("KVM discovered backwards TSC");
2734 if (check_tsc_unstable()) {
2735 u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
2736 vcpu->arch.last_guest_tsc);
2737 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2738 vcpu->arch.tsc_catchup = 1;
2739 }
2740
2741
2742
2743
2744 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2745 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2746 if (vcpu->cpu != cpu)
2747 kvm_migrate_timers(vcpu);
2748 vcpu->cpu = cpu;
2749 }
2750
2751 accumulate_steal_time(vcpu);
2752 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2753}
2754
2755void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2756{
2757 kvm_x86_ops->vcpu_put(vcpu);
2758 kvm_put_guest_fpu(vcpu);
2759 vcpu->arch.last_host_tsc = native_read_tsc();
2760}
2761
2762static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2763 struct kvm_lapic_state *s)
2764{
2765 kvm_x86_ops->sync_pir_to_irr(vcpu);
2766 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2767
2768 return 0;
2769}
2770
2771static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2772 struct kvm_lapic_state *s)
2773{
2774 kvm_apic_post_state_restore(vcpu, s);
2775 update_cr8_intercept(vcpu);
2776
2777 return 0;
2778}
2779
2780static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2781 struct kvm_interrupt *irq)
2782{
2783 if (irq->irq >= KVM_NR_INTERRUPTS)
2784 return -EINVAL;
2785 if (irqchip_in_kernel(vcpu->kvm))
2786 return -ENXIO;
2787
2788 kvm_queue_interrupt(vcpu, irq->irq, false);
2789 kvm_make_request(KVM_REQ_EVENT, vcpu);
2790
2791 return 0;
2792}
2793
2794static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2795{
2796 kvm_inject_nmi(vcpu);
2797
2798 return 0;
2799}
2800
2801static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2802 struct kvm_tpr_access_ctl *tac)
2803{
2804 if (tac->flags)
2805 return -EINVAL;
2806 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2807 return 0;
2808}
2809
2810static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2811 u64 mcg_cap)
2812{
2813 int r;
2814 unsigned bank_num = mcg_cap & 0xff, bank;
2815
2816 r = -EINVAL;
2817 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2818 goto out;
2819 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2820 goto out;
2821 r = 0;
2822 vcpu->arch.mcg_cap = mcg_cap;
2823
2824 if (mcg_cap & MCG_CTL_P)
2825 vcpu->arch.mcg_ctl = ~(u64)0;
2826
2827 for (bank = 0; bank < bank_num; bank++)
2828 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2829out:
2830 return r;
2831}
2832
2833static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2834 struct kvm_x86_mce *mce)
2835{
2836 u64 mcg_cap = vcpu->arch.mcg_cap;
2837 unsigned bank_num = mcg_cap & 0xff;
2838 u64 *banks = vcpu->arch.mce_banks;
2839
2840 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2841 return -EINVAL;
2842
2843
2844
2845
2846 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2847 vcpu->arch.mcg_ctl != ~(u64)0)
2848 return 0;
2849 banks += 4 * mce->bank;
2850
2851
2852
2853
2854 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2855 return 0;
2856 if (mce->status & MCI_STATUS_UC) {
2857 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2858 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2859 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2860 return 0;
2861 }
2862 if (banks[1] & MCI_STATUS_VAL)
2863 mce->status |= MCI_STATUS_OVER;
2864 banks[2] = mce->addr;
2865 banks[3] = mce->misc;
2866 vcpu->arch.mcg_status = mce->mcg_status;
2867 banks[1] = mce->status;
2868 kvm_queue_exception(vcpu, MC_VECTOR);
2869 } else if (!(banks[1] & MCI_STATUS_VAL)
2870 || !(banks[1] & MCI_STATUS_UC)) {
2871 if (banks[1] & MCI_STATUS_VAL)
2872 mce->status |= MCI_STATUS_OVER;
2873 banks[2] = mce->addr;
2874 banks[3] = mce->misc;
2875 banks[1] = mce->status;
2876 } else
2877 banks[1] |= MCI_STATUS_OVER;
2878 return 0;
2879}
2880
2881static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2882 struct kvm_vcpu_events *events)
2883{
2884 process_nmi(vcpu);
2885 events->exception.injected =
2886 vcpu->arch.exception.pending &&
2887 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2888 events->exception.nr = vcpu->arch.exception.nr;
2889 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2890 events->exception.pad = 0;
2891 events->exception.error_code = vcpu->arch.exception.error_code;
2892
2893 events->interrupt.injected =
2894 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2895 events->interrupt.nr = vcpu->arch.interrupt.nr;
2896 events->interrupt.soft = 0;
2897 events->interrupt.shadow =
2898 kvm_x86_ops->get_interrupt_shadow(vcpu,
2899 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2900
2901 events->nmi.injected = vcpu->arch.nmi_injected;
2902 events->nmi.pending = vcpu->arch.nmi_pending != 0;
2903 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2904 events->nmi.pad = 0;
2905
2906 events->sipi_vector = 0;
2907
2908 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2909 | KVM_VCPUEVENT_VALID_SHADOW);
2910 memset(&events->reserved, 0, sizeof(events->reserved));
2911}
2912
2913static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2914 struct kvm_vcpu_events *events)
2915{
2916 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2917 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2918 | KVM_VCPUEVENT_VALID_SHADOW))
2919 return -EINVAL;
2920
2921 process_nmi(vcpu);
2922 vcpu->arch.exception.pending = events->exception.injected;
2923 vcpu->arch.exception.nr = events->exception.nr;
2924 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2925 vcpu->arch.exception.error_code = events->exception.error_code;
2926
2927 vcpu->arch.interrupt.pending = events->interrupt.injected;
2928 vcpu->arch.interrupt.nr = events->interrupt.nr;
2929 vcpu->arch.interrupt.soft = events->interrupt.soft;
2930 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2931 kvm_x86_ops->set_interrupt_shadow(vcpu,
2932 events->interrupt.shadow);
2933
2934 vcpu->arch.nmi_injected = events->nmi.injected;
2935 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2936 vcpu->arch.nmi_pending = events->nmi.pending;
2937 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2938
2939 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
2940 kvm_vcpu_has_lapic(vcpu))
2941 vcpu->arch.apic->sipi_vector = events->sipi_vector;
2942
2943 kvm_make_request(KVM_REQ_EVENT, vcpu);
2944
2945 return 0;
2946}
2947
2948static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2949 struct kvm_debugregs *dbgregs)
2950{
2951 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2952 dbgregs->dr6 = vcpu->arch.dr6;
2953 dbgregs->dr7 = vcpu->arch.dr7;
2954 dbgregs->flags = 0;
2955 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
2956}
2957
2958static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2959 struct kvm_debugregs *dbgregs)
2960{
2961 if (dbgregs->flags)
2962 return -EINVAL;
2963
2964 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2965 vcpu->arch.dr6 = dbgregs->dr6;
2966 vcpu->arch.dr7 = dbgregs->dr7;
2967
2968 return 0;
2969}
2970
2971static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2972 struct kvm_xsave *guest_xsave)
2973{
2974 if (cpu_has_xsave)
2975 memcpy(guest_xsave->region,
2976 &vcpu->arch.guest_fpu.state->xsave,
2977 xstate_size);
2978 else {
2979 memcpy(guest_xsave->region,
2980 &vcpu->arch.guest_fpu.state->fxsave,
2981 sizeof(struct i387_fxsave_struct));
2982 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
2983 XSTATE_FPSSE;
2984 }
2985}
2986
2987static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
2988 struct kvm_xsave *guest_xsave)
2989{
2990 u64 xstate_bv =
2991 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
2992
2993 if (cpu_has_xsave)
2994 memcpy(&vcpu->arch.guest_fpu.state->xsave,
2995 guest_xsave->region, xstate_size);
2996 else {
2997 if (xstate_bv & ~XSTATE_FPSSE)
2998 return -EINVAL;
2999 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
3000 guest_xsave->region, sizeof(struct i387_fxsave_struct));
3001 }
3002 return 0;
3003}
3004
3005static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
3006 struct kvm_xcrs *guest_xcrs)
3007{
3008 if (!cpu_has_xsave) {
3009 guest_xcrs->nr_xcrs = 0;
3010 return;
3011 }
3012
3013 guest_xcrs->nr_xcrs = 1;
3014 guest_xcrs->flags = 0;
3015 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
3016 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
3017}
3018
3019static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3020 struct kvm_xcrs *guest_xcrs)
3021{
3022 int i, r = 0;
3023
3024 if (!cpu_has_xsave)
3025 return -EINVAL;
3026
3027 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
3028 return -EINVAL;
3029
3030 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3031
3032 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
3033 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3034 guest_xcrs->xcrs[0].value);
3035 break;
3036 }
3037 if (r)
3038 r = -EINVAL;
3039 return r;
3040}
3041
3042
3043
3044
3045
3046
3047
3048static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
3049{
3050 if (!vcpu->arch.pv_time_enabled)
3051 return -EINVAL;
3052 vcpu->arch.pvclock_set_guest_stopped_request = true;
3053 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3054 return 0;
3055}
3056
3057long kvm_arch_vcpu_ioctl(struct file *filp,
3058 unsigned int ioctl, unsigned long arg)
3059{
3060 struct kvm_vcpu *vcpu = filp->private_data;
3061 void __user *argp = (void __user *)arg;
3062 int r;
3063 union {
3064 struct kvm_lapic_state *lapic;
3065 struct kvm_xsave *xsave;
3066 struct kvm_xcrs *xcrs;
3067 void *buffer;
3068 } u;
3069
3070 u.buffer = NULL;
3071 switch (ioctl) {
3072 case KVM_GET_LAPIC: {
3073 r = -EINVAL;
3074 if (!vcpu->arch.apic)
3075 goto out;
3076 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3077
3078 r = -ENOMEM;
3079 if (!u.lapic)
3080 goto out;
3081 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3082 if (r)
3083 goto out;
3084 r = -EFAULT;
3085 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3086 goto out;
3087 r = 0;
3088 break;
3089 }
3090 case KVM_SET_LAPIC: {
3091 r = -EINVAL;
3092 if (!vcpu->arch.apic)
3093 goto out;
3094 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3095 if (IS_ERR(u.lapic))
3096 return PTR_ERR(u.lapic);
3097
3098 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3099 break;
3100 }
3101 case KVM_INTERRUPT: {
3102 struct kvm_interrupt irq;
3103
3104 r = -EFAULT;
3105 if (copy_from_user(&irq, argp, sizeof irq))
3106 goto out;
3107 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3108 break;
3109 }
3110 case KVM_NMI: {
3111 r = kvm_vcpu_ioctl_nmi(vcpu);
3112 break;
3113 }
3114 case KVM_SET_CPUID: {
3115 struct kvm_cpuid __user *cpuid_arg = argp;
3116 struct kvm_cpuid cpuid;
3117
3118 r = -EFAULT;
3119 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3120 goto out;
3121 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3122 break;
3123 }
3124 case KVM_SET_CPUID2: {
3125 struct kvm_cpuid2 __user *cpuid_arg = argp;
3126 struct kvm_cpuid2 cpuid;
3127
3128 r = -EFAULT;
3129 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3130 goto out;
3131 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3132 cpuid_arg->entries);
3133 break;
3134 }
3135 case KVM_GET_CPUID2: {
3136 struct kvm_cpuid2 __user *cpuid_arg = argp;
3137 struct kvm_cpuid2 cpuid;
3138
3139 r = -EFAULT;
3140 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3141 goto out;
3142 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3143 cpuid_arg->entries);
3144 if (r)
3145 goto out;
3146 r = -EFAULT;
3147 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3148 goto out;
3149 r = 0;
3150 break;
3151 }
3152 case KVM_GET_MSRS:
3153 r = msr_io(vcpu, argp, kvm_get_msr, 1);
3154 break;
3155 case KVM_SET_MSRS:
3156 r = msr_io(vcpu, argp, do_set_msr, 0);
3157 break;
3158 case KVM_TPR_ACCESS_REPORTING: {
3159 struct kvm_tpr_access_ctl tac;
3160
3161 r = -EFAULT;
3162 if (copy_from_user(&tac, argp, sizeof tac))
3163 goto out;
3164 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3165 if (r)
3166 goto out;
3167 r = -EFAULT;
3168 if (copy_to_user(argp, &tac, sizeof tac))
3169 goto out;
3170 r = 0;
3171 break;
3172 };
3173 case KVM_SET_VAPIC_ADDR: {
3174 struct kvm_vapic_addr va;
3175
3176 r = -EINVAL;
3177 if (!irqchip_in_kernel(vcpu->kvm))
3178 goto out;
3179 r = -EFAULT;
3180 if (copy_from_user(&va, argp, sizeof va))
3181 goto out;
3182 r = 0;
3183 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3184 break;
3185 }
3186 case KVM_X86_SETUP_MCE: {
3187 u64 mcg_cap;
3188
3189 r = -EFAULT;
3190 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3191 goto out;
3192 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3193 break;
3194 }
3195 case KVM_X86_SET_MCE: {
3196 struct kvm_x86_mce mce;
3197
3198 r = -EFAULT;
3199 if (copy_from_user(&mce, argp, sizeof mce))
3200 goto out;
3201 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3202 break;
3203 }
3204 case KVM_GET_VCPU_EVENTS: {
3205 struct kvm_vcpu_events events;
3206
3207 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3208
3209 r = -EFAULT;
3210 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3211 break;
3212 r = 0;
3213 break;
3214 }
3215 case KVM_SET_VCPU_EVENTS: {
3216 struct kvm_vcpu_events events;
3217
3218 r = -EFAULT;
3219 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3220 break;
3221
3222 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3223 break;
3224 }
3225 case KVM_GET_DEBUGREGS: {
3226 struct kvm_debugregs dbgregs;
3227
3228 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3229
3230 r = -EFAULT;
3231 if (copy_to_user(argp, &dbgregs,
3232 sizeof(struct kvm_debugregs)))
3233 break;
3234 r = 0;
3235 break;
3236 }
3237 case KVM_SET_DEBUGREGS: {
3238 struct kvm_debugregs dbgregs;
3239
3240 r = -EFAULT;
3241 if (copy_from_user(&dbgregs, argp,
3242 sizeof(struct kvm_debugregs)))
3243 break;
3244
3245 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3246 break;
3247 }
3248 case KVM_GET_XSAVE: {
3249 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3250 r = -ENOMEM;
3251 if (!u.xsave)
3252 break;
3253
3254 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3255
3256 r = -EFAULT;
3257 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3258 break;
3259 r = 0;
3260 break;
3261 }
3262 case KVM_SET_XSAVE: {
3263 u.xsave = memdup_user(argp, sizeof(*u.xsave));
3264 if (IS_ERR(u.xsave))
3265 return PTR_ERR(u.xsave);
3266
3267 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3268 break;
3269 }
3270 case KVM_GET_XCRS: {
3271 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3272 r = -ENOMEM;
3273 if (!u.xcrs)
3274 break;
3275
3276 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3277
3278 r = -EFAULT;
3279 if (copy_to_user(argp, u.xcrs,
3280 sizeof(struct kvm_xcrs)))
3281 break;
3282 r = 0;
3283 break;
3284 }
3285 case KVM_SET_XCRS: {
3286 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
3287 if (IS_ERR(u.xcrs))
3288 return PTR_ERR(u.xcrs);
3289
3290 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3291 break;
3292 }
3293 case KVM_SET_TSC_KHZ: {
3294 u32 user_tsc_khz;
3295
3296 r = -EINVAL;
3297 user_tsc_khz = (u32)arg;
3298
3299 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3300 goto out;
3301
3302 if (user_tsc_khz == 0)
3303 user_tsc_khz = tsc_khz;
3304
3305 kvm_set_tsc_khz(vcpu, user_tsc_khz);
3306
3307 r = 0;
3308 goto out;
3309 }
3310 case KVM_GET_TSC_KHZ: {
3311 r = vcpu->arch.virtual_tsc_khz;
3312 goto out;
3313 }
3314 case KVM_KVMCLOCK_CTRL: {
3315 r = kvm_set_guest_paused(vcpu);
3316 goto out;
3317 }
3318 default:
3319 r = -EINVAL;
3320 }
3321out:
3322 kfree(u.buffer);
3323 return r;
3324}
3325
3326int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3327{
3328 return VM_FAULT_SIGBUS;
3329}
3330
3331static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3332{
3333 int ret;
3334
3335 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3336 return -EINVAL;
3337 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3338 return ret;
3339}
3340
3341static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3342 u64 ident_addr)
3343{
3344 kvm->arch.ept_identity_map_addr = ident_addr;
3345 return 0;
3346}
3347
3348static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3349 u32 kvm_nr_mmu_pages)
3350{
3351 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3352 return -EINVAL;
3353
3354 mutex_lock(&kvm->slots_lock);
3355
3356 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3357 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3358
3359 mutex_unlock(&kvm->slots_lock);
3360 return 0;
3361}
3362
3363static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3364{
3365 return kvm->arch.n_max_mmu_pages;
3366}
3367
3368static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3369{
3370 int r;
3371
3372 r = 0;
3373 switch (chip->chip_id) {
3374 case KVM_IRQCHIP_PIC_MASTER:
3375 memcpy(&chip->chip.pic,
3376 &pic_irqchip(kvm)->pics[0],
3377 sizeof(struct kvm_pic_state));
3378 break;
3379 case KVM_IRQCHIP_PIC_SLAVE:
3380 memcpy(&chip->chip.pic,
3381 &pic_irqchip(kvm)->pics[1],
3382 sizeof(struct kvm_pic_state));
3383 break;
3384 case KVM_IRQCHIP_IOAPIC:
3385 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3386 break;
3387 default:
3388 r = -EINVAL;
3389 break;
3390 }
3391 return r;
3392}
3393
3394static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3395{
3396 int r;
3397
3398 r = 0;
3399 switch (chip->chip_id) {
3400 case KVM_IRQCHIP_PIC_MASTER:
3401 spin_lock(&pic_irqchip(kvm)->lock);
3402 memcpy(&pic_irqchip(kvm)->pics[0],
3403 &chip->chip.pic,
3404 sizeof(struct kvm_pic_state));
3405 spin_unlock(&pic_irqchip(kvm)->lock);
3406 break;
3407 case KVM_IRQCHIP_PIC_SLAVE:
3408 spin_lock(&pic_irqchip(kvm)->lock);
3409 memcpy(&pic_irqchip(kvm)->pics[1],
3410 &chip->chip.pic,
3411 sizeof(struct kvm_pic_state));
3412 spin_unlock(&pic_irqchip(kvm)->lock);
3413 break;
3414 case KVM_IRQCHIP_IOAPIC:
3415 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3416 break;
3417 default:
3418 r = -EINVAL;
3419 break;
3420 }
3421 kvm_pic_update_irq(pic_irqchip(kvm));
3422 return r;
3423}
3424
3425static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3426{
3427 int r = 0;
3428
3429 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3430 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3431 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3432 return r;
3433}
3434
3435static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3436{
3437 int r = 0;
3438
3439 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3440 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3441 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3442 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3443 return r;
3444}
3445
3446static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3447{
3448 int r = 0;
3449
3450 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3451 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3452 sizeof(ps->channels));
3453 ps->flags = kvm->arch.vpit->pit_state.flags;
3454 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3455 memset(&ps->reserved, 0, sizeof(ps->reserved));
3456 return r;
3457}
3458
3459static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3460{
3461 int r = 0, start = 0;
3462 u32 prev_legacy, cur_legacy;
3463 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3464 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3465 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3466 if (!prev_legacy && cur_legacy)
3467 start = 1;
3468 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3469 sizeof(kvm->arch.vpit->pit_state.channels));
3470 kvm->arch.vpit->pit_state.flags = ps->flags;
3471 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3472 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3473 return r;
3474}
3475
3476static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3477 struct kvm_reinject_control *control)
3478{
3479 if (!kvm->arch.vpit)
3480 return -ENXIO;
3481 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3482 kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
3483 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3484 return 0;
3485}
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3507{
3508 int r;
3509 struct kvm_memory_slot *memslot;
3510 unsigned long n, i;
3511 unsigned long *dirty_bitmap;
3512 unsigned long *dirty_bitmap_buffer;
3513 bool is_dirty = false;
3514
3515 mutex_lock(&kvm->slots_lock);
3516
3517 r = -EINVAL;
3518 if (log->slot >= KVM_USER_MEM_SLOTS)
3519 goto out;
3520
3521 memslot = id_to_memslot(kvm->memslots, log->slot);
3522
3523 dirty_bitmap = memslot->dirty_bitmap;
3524 r = -ENOENT;
3525 if (!dirty_bitmap)
3526 goto out;
3527
3528 n = kvm_dirty_bitmap_bytes(memslot);
3529
3530 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3531 memset(dirty_bitmap_buffer, 0, n);
3532
3533 spin_lock(&kvm->mmu_lock);
3534
3535 for (i = 0; i < n / sizeof(long); i++) {
3536 unsigned long mask;
3537 gfn_t offset;
3538
3539 if (!dirty_bitmap[i])
3540 continue;
3541
3542 is_dirty = true;
3543
3544 mask = xchg(&dirty_bitmap[i], 0);
3545 dirty_bitmap_buffer[i] = mask;
3546
3547 offset = i * BITS_PER_LONG;
3548 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3549 }
3550 if (is_dirty)
3551 kvm_flush_remote_tlbs(kvm);
3552
3553 spin_unlock(&kvm->mmu_lock);
3554
3555 r = -EFAULT;
3556 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3557 goto out;
3558
3559 r = 0;
3560out:
3561 mutex_unlock(&kvm->slots_lock);
3562 return r;
3563}
3564
3565int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
3566 bool line_status)
3567{
3568 if (!irqchip_in_kernel(kvm))
3569 return -ENXIO;
3570
3571 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3572 irq_event->irq, irq_event->level,
3573 line_status);
3574 return 0;
3575}
3576
3577long kvm_arch_vm_ioctl(struct file *filp,
3578 unsigned int ioctl, unsigned long arg)
3579{
3580 struct kvm *kvm = filp->private_data;
3581 void __user *argp = (void __user *)arg;
3582 int r = -ENOTTY;
3583
3584
3585
3586
3587
3588 union {
3589 struct kvm_pit_state ps;
3590 struct kvm_pit_state2 ps2;
3591 struct kvm_pit_config pit_config;
3592 } u;
3593
3594 switch (ioctl) {
3595 case KVM_SET_TSS_ADDR:
3596 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3597 break;
3598 case KVM_SET_IDENTITY_MAP_ADDR: {
3599 u64 ident_addr;
3600
3601 r = -EFAULT;
3602 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3603 goto out;
3604 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3605 break;
3606 }
3607 case KVM_SET_NR_MMU_PAGES:
3608 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3609 break;
3610 case KVM_GET_NR_MMU_PAGES:
3611 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3612 break;
3613 case KVM_CREATE_IRQCHIP: {
3614 struct kvm_pic *vpic;
3615
3616 mutex_lock(&kvm->lock);
3617 r = -EEXIST;
3618 if (kvm->arch.vpic)
3619 goto create_irqchip_unlock;
3620 r = -EINVAL;
3621 if (atomic_read(&kvm->online_vcpus))
3622 goto create_irqchip_unlock;
3623 r = -ENOMEM;
3624 vpic = kvm_create_pic(kvm);
3625 if (vpic) {
3626 r = kvm_ioapic_init(kvm);
3627 if (r) {
3628 mutex_lock(&kvm->slots_lock);
3629 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3630 &vpic->dev_master);
3631 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3632 &vpic->dev_slave);
3633 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3634 &vpic->dev_eclr);
3635 mutex_unlock(&kvm->slots_lock);
3636 kfree(vpic);
3637 goto create_irqchip_unlock;
3638 }
3639 } else
3640 goto create_irqchip_unlock;
3641 smp_wmb();
3642 kvm->arch.vpic = vpic;
3643 smp_wmb();
3644 r = kvm_setup_default_irq_routing(kvm);
3645 if (r) {
3646 mutex_lock(&kvm->slots_lock);
3647 mutex_lock(&kvm->irq_lock);
3648 kvm_ioapic_destroy(kvm);
3649 kvm_destroy_pic(kvm);
3650 mutex_unlock(&kvm->irq_lock);
3651 mutex_unlock(&kvm->slots_lock);
3652 }
3653 create_irqchip_unlock:
3654 mutex_unlock(&kvm->lock);
3655 break;
3656 }
3657 case KVM_CREATE_PIT:
3658 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3659 goto create_pit;
3660 case KVM_CREATE_PIT2:
3661 r = -EFAULT;
3662 if (copy_from_user(&u.pit_config, argp,
3663 sizeof(struct kvm_pit_config)))
3664 goto out;
3665 create_pit:
3666 mutex_lock(&kvm->slots_lock);
3667 r = -EEXIST;
3668 if (kvm->arch.vpit)
3669 goto create_pit_unlock;
3670 r = -ENOMEM;
3671 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3672 if (kvm->arch.vpit)
3673 r = 0;
3674 create_pit_unlock:
3675 mutex_unlock(&kvm->slots_lock);
3676 break;
3677 case KVM_GET_IRQCHIP: {
3678
3679 struct kvm_irqchip *chip;
3680
3681 chip = memdup_user(argp, sizeof(*chip));
3682 if (IS_ERR(chip)) {
3683 r = PTR_ERR(chip);
3684 goto out;
3685 }
3686
3687 r = -ENXIO;
3688 if (!irqchip_in_kernel(kvm))
3689 goto get_irqchip_out;
3690 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3691 if (r)
3692 goto get_irqchip_out;
3693 r = -EFAULT;
3694 if (copy_to_user(argp, chip, sizeof *chip))
3695 goto get_irqchip_out;
3696 r = 0;
3697 get_irqchip_out:
3698 kfree(chip);
3699 break;
3700 }
3701 case KVM_SET_IRQCHIP: {
3702
3703 struct kvm_irqchip *chip;
3704
3705 chip = memdup_user(argp, sizeof(*chip));
3706 if (IS_ERR(chip)) {
3707 r = PTR_ERR(chip);
3708 goto out;
3709 }
3710
3711 r = -ENXIO;
3712 if (!irqchip_in_kernel(kvm))
3713 goto set_irqchip_out;
3714 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3715 if (r)
3716 goto set_irqchip_out;
3717 r = 0;
3718 set_irqchip_out:
3719 kfree(chip);
3720 break;
3721 }
3722 case KVM_GET_PIT: {
3723 r = -EFAULT;
3724 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3725 goto out;
3726 r = -ENXIO;
3727 if (!kvm->arch.vpit)
3728 goto out;
3729 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3730 if (r)
3731 goto out;
3732 r = -EFAULT;
3733 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3734 goto out;
3735 r = 0;
3736 break;
3737 }
3738 case KVM_SET_PIT: {
3739 r = -EFAULT;
3740 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3741 goto out;
3742 r = -ENXIO;
3743 if (!kvm->arch.vpit)
3744 goto out;
3745 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3746 break;
3747 }
3748 case KVM_GET_PIT2: {
3749 r = -ENXIO;
3750 if (!kvm->arch.vpit)
3751 goto out;
3752 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3753 if (r)
3754 goto out;
3755 r = -EFAULT;
3756 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3757 goto out;
3758 r = 0;
3759 break;
3760 }
3761 case KVM_SET_PIT2: {
3762 r = -EFAULT;
3763 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3764 goto out;
3765 r = -ENXIO;
3766 if (!kvm->arch.vpit)
3767 goto out;
3768 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3769 break;
3770 }
3771 case KVM_REINJECT_CONTROL: {
3772 struct kvm_reinject_control control;
3773 r = -EFAULT;
3774 if (copy_from_user(&control, argp, sizeof(control)))
3775 goto out;
3776 r = kvm_vm_ioctl_reinject(kvm, &control);
3777 break;
3778 }
3779 case KVM_XEN_HVM_CONFIG: {
3780 r = -EFAULT;
3781 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3782 sizeof(struct kvm_xen_hvm_config)))
3783 goto out;
3784 r = -EINVAL;
3785 if (kvm->arch.xen_hvm_config.flags)
3786 goto out;
3787 r = 0;
3788 break;
3789 }
3790 case KVM_SET_CLOCK: {
3791 struct kvm_clock_data user_ns;
3792 u64 now_ns;
3793 s64 delta;
3794
3795 r = -EFAULT;
3796 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3797 goto out;
3798
3799 r = -EINVAL;
3800 if (user_ns.flags)
3801 goto out;
3802
3803 r = 0;
3804 local_irq_disable();
3805 now_ns = get_kernel_ns();
3806 delta = user_ns.clock - now_ns;
3807 local_irq_enable();
3808 kvm->arch.kvmclock_offset = delta;
3809 break;
3810 }
3811 case KVM_GET_CLOCK: {
3812 struct kvm_clock_data user_ns;
3813 u64 now_ns;
3814
3815 local_irq_disable();
3816 now_ns = get_kernel_ns();
3817 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3818 local_irq_enable();
3819 user_ns.flags = 0;
3820 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3821
3822 r = -EFAULT;
3823 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3824 goto out;
3825 r = 0;
3826 break;
3827 }
3828
3829 default:
3830 ;
3831 }
3832out:
3833 return r;
3834}
3835
3836static void kvm_init_msr_list(void)
3837{
3838 u32 dummy[2];
3839 unsigned i, j;
3840
3841
3842 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3843 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3844 continue;
3845 if (j < i)
3846 msrs_to_save[j] = msrs_to_save[i];
3847 j++;
3848 }
3849 num_msrs_to_save = j;
3850}
3851
3852static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3853 const void *v)
3854{
3855 int handled = 0;
3856 int n;
3857
3858 do {
3859 n = min(len, 8);
3860 if (!(vcpu->arch.apic &&
3861 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
3862 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3863 break;
3864 handled += n;
3865 addr += n;
3866 len -= n;
3867 v += n;
3868 } while (len);
3869
3870 return handled;
3871}
3872
3873static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3874{
3875 int handled = 0;
3876 int n;
3877
3878 do {
3879 n = min(len, 8);
3880 if (!(vcpu->arch.apic &&
3881 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
3882 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3883 break;
3884 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
3885 handled += n;
3886 addr += n;
3887 len -= n;
3888 v += n;
3889 } while (len);
3890
3891 return handled;
3892}
3893
3894static void kvm_set_segment(struct kvm_vcpu *vcpu,
3895 struct kvm_segment *var, int seg)
3896{
3897 kvm_x86_ops->set_segment(vcpu, var, seg);
3898}
3899
3900void kvm_get_segment(struct kvm_vcpu *vcpu,
3901 struct kvm_segment *var, int seg)
3902{
3903 kvm_x86_ops->get_segment(vcpu, var, seg);
3904}
3905
3906gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3907{
3908 gpa_t t_gpa;
3909 struct x86_exception exception;
3910
3911 BUG_ON(!mmu_is_nested(vcpu));
3912
3913
3914 access |= PFERR_USER_MASK;
3915 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
3916
3917 return t_gpa;
3918}
3919
3920gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
3921 struct x86_exception *exception)
3922{
3923 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3924 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3925}
3926
3927 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
3928 struct x86_exception *exception)
3929{
3930 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3931 access |= PFERR_FETCH_MASK;
3932 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3933}
3934
3935gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
3936 struct x86_exception *exception)
3937{
3938 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3939 access |= PFERR_WRITE_MASK;
3940 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3941}
3942
3943
3944gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
3945 struct x86_exception *exception)
3946{
3947 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
3948}
3949
3950static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3951 struct kvm_vcpu *vcpu, u32 access,
3952 struct x86_exception *exception)
3953{
3954 void *data = val;
3955 int r = X86EMUL_CONTINUE;
3956
3957 while (bytes) {
3958 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
3959 exception);
3960 unsigned offset = addr & (PAGE_SIZE-1);
3961 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3962 int ret;
3963
3964 if (gpa == UNMAPPED_GVA)
3965 return X86EMUL_PROPAGATE_FAULT;
3966 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3967 if (ret < 0) {
3968 r = X86EMUL_IO_NEEDED;
3969 goto out;
3970 }
3971
3972 bytes -= toread;
3973 data += toread;
3974 addr += toread;
3975 }
3976out:
3977 return r;
3978}
3979
3980
3981static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
3982 gva_t addr, void *val, unsigned int bytes,
3983 struct x86_exception *exception)
3984{
3985 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3986 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3987
3988 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3989 access | PFERR_FETCH_MASK,
3990 exception);
3991}
3992
3993int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
3994 gva_t addr, void *val, unsigned int bytes,
3995 struct x86_exception *exception)
3996{
3997 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3998 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3999
4000 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
4001 exception);
4002}
4003EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
4004
4005static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4006 gva_t addr, void *val, unsigned int bytes,
4007 struct x86_exception *exception)
4008{
4009 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4010 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
4011}
4012
4013int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4014 gva_t addr, void *val,
4015 unsigned int bytes,
4016 struct x86_exception *exception)
4017{
4018 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4019 void *data = val;
4020 int r = X86EMUL_CONTINUE;
4021
4022 while (bytes) {
4023 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
4024 PFERR_WRITE_MASK,
4025 exception);
4026 unsigned offset = addr & (PAGE_SIZE-1);
4027 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
4028 int ret;
4029
4030 if (gpa == UNMAPPED_GVA)
4031 return X86EMUL_PROPAGATE_FAULT;
4032 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
4033 if (ret < 0) {
4034 r = X86EMUL_IO_NEEDED;
4035 goto out;
4036 }
4037
4038 bytes -= towrite;
4039 data += towrite;
4040 addr += towrite;
4041 }
4042out:
4043 return r;
4044}
4045EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4046
4047static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4048 gpa_t *gpa, struct x86_exception *exception,
4049 bool write)
4050{
4051 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
4052 | (write ? PFERR_WRITE_MASK : 0);
4053
4054 if (vcpu_match_mmio_gva(vcpu, gva)
4055 && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
4056 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4057 (gva & (PAGE_SIZE - 1));
4058 trace_vcpu_match_mmio(gva, *gpa, write, false);
4059 return 1;
4060 }
4061
4062 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4063
4064 if (*gpa == UNMAPPED_GVA)
4065 return -1;
4066
4067
4068 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4069 return 1;
4070
4071 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
4072 trace_vcpu_match_mmio(gva, *gpa, write, true);
4073 return 1;
4074 }
4075
4076 return 0;
4077}
4078
4079int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4080 const void *val, int bytes)
4081{
4082 int ret;
4083
4084 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
4085 if (ret < 0)
4086 return 0;
4087 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
4088 return 1;
4089}
4090
4091struct read_write_emulator_ops {
4092 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
4093 int bytes);
4094 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
4095 void *val, int bytes);
4096 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4097 int bytes, void *val);
4098 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4099 void *val, int bytes);
4100 bool write;
4101};
4102
4103static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4104{
4105 if (vcpu->mmio_read_completed) {
4106 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4107 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
4108 vcpu->mmio_read_completed = 0;
4109 return 1;
4110 }
4111
4112 return 0;
4113}
4114
4115static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4116 void *val, int bytes)
4117{
4118 return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
4119}
4120
4121static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4122 void *val, int bytes)
4123{
4124 return emulator_write_phys(vcpu, gpa, val, bytes);
4125}
4126
4127static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4128{
4129 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
4130 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4131}
4132
4133static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4134 void *val, int bytes)
4135{
4136 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
4137 return X86EMUL_IO_NEEDED;
4138}
4139
4140static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4141 void *val, int bytes)
4142{
4143 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
4144
4145 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
4146 return X86EMUL_CONTINUE;
4147}
4148
4149static const struct read_write_emulator_ops read_emultor = {
4150 .read_write_prepare = read_prepare,
4151 .read_write_emulate = read_emulate,
4152 .read_write_mmio = vcpu_mmio_read,
4153 .read_write_exit_mmio = read_exit_mmio,
4154};
4155
4156static const struct read_write_emulator_ops write_emultor = {
4157 .read_write_emulate = write_emulate,
4158 .read_write_mmio = write_mmio,
4159 .read_write_exit_mmio = write_exit_mmio,
4160 .write = true,
4161};
4162
4163static int emulator_read_write_onepage(unsigned long addr, void *val,
4164 unsigned int bytes,
4165 struct x86_exception *exception,
4166 struct kvm_vcpu *vcpu,
4167 const struct read_write_emulator_ops *ops)
4168{
4169 gpa_t gpa;
4170 int handled, ret;
4171 bool write = ops->write;
4172 struct kvm_mmio_fragment *frag;
4173
4174 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
4175
4176 if (ret < 0)
4177 return X86EMUL_PROPAGATE_FAULT;
4178
4179
4180 if (ret)
4181 goto mmio;
4182
4183 if (ops->read_write_emulate(vcpu, gpa, val, bytes))
4184 return X86EMUL_CONTINUE;
4185
4186mmio:
4187
4188
4189
4190 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
4191 if (handled == bytes)
4192 return X86EMUL_CONTINUE;
4193
4194 gpa += handled;
4195 bytes -= handled;
4196 val += handled;
4197
4198 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
4199 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
4200 frag->gpa = gpa;
4201 frag->data = val;
4202 frag->len = bytes;
4203 return X86EMUL_CONTINUE;
4204}
4205
4206int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
4207 void *val, unsigned int bytes,
4208 struct x86_exception *exception,
4209 const struct read_write_emulator_ops *ops)
4210{
4211 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4212 gpa_t gpa;
4213 int rc;
4214
4215 if (ops->read_write_prepare &&
4216 ops->read_write_prepare(vcpu, val, bytes))
4217 return X86EMUL_CONTINUE;
4218
4219 vcpu->mmio_nr_fragments = 0;
4220
4221
4222 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
4223 int now;
4224
4225 now = -addr & ~PAGE_MASK;
4226 rc = emulator_read_write_onepage(addr, val, now, exception,
4227 vcpu, ops);
4228
4229 if (rc != X86EMUL_CONTINUE)
4230 return rc;
4231 addr += now;
4232 val += now;
4233 bytes -= now;
4234 }
4235
4236 rc = emulator_read_write_onepage(addr, val, bytes, exception,
4237 vcpu, ops);
4238 if (rc != X86EMUL_CONTINUE)
4239 return rc;
4240
4241 if (!vcpu->mmio_nr_fragments)
4242 return rc;
4243
4244 gpa = vcpu->mmio_fragments[0].gpa;
4245
4246 vcpu->mmio_needed = 1;
4247 vcpu->mmio_cur_fragment = 0;
4248
4249 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
4250 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
4251 vcpu->run->exit_reason = KVM_EXIT_MMIO;
4252 vcpu->run->mmio.phys_addr = gpa;
4253
4254 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
4255}
4256
4257static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
4258 unsigned long addr,
4259 void *val,
4260 unsigned int bytes,
4261 struct x86_exception *exception)
4262{
4263 return emulator_read_write(ctxt, addr, val, bytes,
4264 exception, &read_emultor);
4265}
4266
4267int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
4268 unsigned long addr,
4269 const void *val,
4270 unsigned int bytes,
4271 struct x86_exception *exception)
4272{
4273 return emulator_read_write(ctxt, addr, (void *)val, bytes,
4274 exception, &write_emultor);
4275}
4276
4277#define CMPXCHG_TYPE(t, ptr, old, new) \
4278 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4279
4280#ifdef CONFIG_X86_64
4281# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4282#else
4283# define CMPXCHG64(ptr, old, new) \
4284 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4285#endif
4286
4287static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4288 unsigned long addr,
4289 const void *old,
4290 const void *new,
4291 unsigned int bytes,
4292 struct x86_exception *exception)
4293{
4294 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4295 gpa_t gpa;
4296 struct page *page;
4297 char *kaddr;
4298 bool exchanged;
4299
4300
4301 if (bytes > 8 || (bytes & (bytes - 1)))
4302 goto emul_write;
4303
4304 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4305
4306 if (gpa == UNMAPPED_GVA ||
4307 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4308 goto emul_write;
4309
4310 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4311 goto emul_write;
4312
4313 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4314 if (is_error_page(page))
4315 goto emul_write;
4316
4317 kaddr = kmap_atomic(page);
4318 kaddr += offset_in_page(gpa);
4319 switch (bytes) {
4320 case 1:
4321 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4322 break;
4323 case 2:
4324 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4325 break;
4326 case 4:
4327 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4328 break;
4329 case 8:
4330 exchanged = CMPXCHG64(kaddr, old, new);
4331 break;
4332 default:
4333 BUG();
4334 }
4335 kunmap_atomic(kaddr);
4336 kvm_release_page_dirty(page);
4337
4338 if (!exchanged)
4339 return X86EMUL_CMPXCHG_FAILED;
4340
4341 kvm_mmu_pte_write(vcpu, gpa, new, bytes);
4342
4343 return X86EMUL_CONTINUE;
4344
4345emul_write:
4346 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4347
4348 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4349}
4350
4351static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4352{
4353
4354 int r;
4355
4356 if (vcpu->arch.pio.in)
4357 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
4358 vcpu->arch.pio.size, pd);
4359 else
4360 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
4361 vcpu->arch.pio.port, vcpu->arch.pio.size,
4362 pd);
4363 return r;
4364}
4365
4366static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4367 unsigned short port, void *val,
4368 unsigned int count, bool in)
4369{
4370 trace_kvm_pio(!in, port, size, count);
4371
4372 vcpu->arch.pio.port = port;
4373 vcpu->arch.pio.in = in;
4374 vcpu->arch.pio.count = count;
4375 vcpu->arch.pio.size = size;
4376
4377 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4378 vcpu->arch.pio.count = 0;
4379 return 1;
4380 }
4381
4382 vcpu->run->exit_reason = KVM_EXIT_IO;
4383 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
4384 vcpu->run->io.size = size;
4385 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4386 vcpu->run->io.count = count;
4387 vcpu->run->io.port = port;
4388
4389 return 0;
4390}
4391
4392static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4393 int size, unsigned short port, void *val,
4394 unsigned int count)
4395{
4396 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4397 int ret;
4398
4399 if (vcpu->arch.pio.count)
4400 goto data_avail;
4401
4402 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
4403 if (ret) {
4404data_avail:
4405 memcpy(val, vcpu->arch.pio_data, size * count);
4406 vcpu->arch.pio.count = 0;
4407 return 1;
4408 }
4409
4410 return 0;
4411}
4412
4413static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4414 int size, unsigned short port,
4415 const void *val, unsigned int count)
4416{
4417 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4418
4419 memcpy(vcpu->arch.pio_data, val, size * count);
4420 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4421}
4422
4423static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4424{
4425 return kvm_x86_ops->get_segment_base(vcpu, seg);
4426}
4427
4428static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4429{
4430 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4431}
4432
4433int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4434{
4435 if (!need_emulate_wbinvd(vcpu))
4436 return X86EMUL_CONTINUE;
4437
4438 if (kvm_x86_ops->has_wbinvd_exit()) {
4439 int cpu = get_cpu();
4440
4441 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4442 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4443 wbinvd_ipi, NULL, 1);
4444 put_cpu();
4445 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4446 } else
4447 wbinvd();
4448 return X86EMUL_CONTINUE;
4449}
4450EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4451
4452static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4453{
4454 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4455}
4456
4457int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4458{
4459 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4460}
4461
4462int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4463{
4464
4465 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4466}
4467
4468static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4469{
4470 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4471}
4472
4473static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4474{
4475 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4476 unsigned long value;
4477
4478 switch (cr) {
4479 case 0:
4480 value = kvm_read_cr0(vcpu);
4481 break;
4482 case 2:
4483 value = vcpu->arch.cr2;
4484 break;
4485 case 3:
4486 value = kvm_read_cr3(vcpu);
4487 break;
4488 case 4:
4489 value = kvm_read_cr4(vcpu);
4490 break;
4491 case 8:
4492 value = kvm_get_cr8(vcpu);
4493 break;
4494 default:
4495 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4496 return 0;
4497 }
4498
4499 return value;
4500}
4501
4502static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4503{
4504 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4505 int res = 0;
4506
4507 switch (cr) {
4508 case 0:
4509 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4510 break;
4511 case 2:
4512 vcpu->arch.cr2 = val;
4513 break;
4514 case 3:
4515 res = kvm_set_cr3(vcpu, val);
4516 break;
4517 case 4:
4518 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4519 break;
4520 case 8:
4521 res = kvm_set_cr8(vcpu, val);
4522 break;
4523 default:
4524 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4525 res = -1;
4526 }
4527
4528 return res;
4529}
4530
4531static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
4532{
4533 kvm_set_rflags(emul_to_vcpu(ctxt), val);
4534}
4535
4536static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4537{
4538 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4539}
4540
4541static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4542{
4543 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4544}
4545
4546static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4547{
4548 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4549}
4550
4551static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4552{
4553 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4554}
4555
4556static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4557{
4558 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4559}
4560
4561static unsigned long emulator_get_cached_segment_base(
4562 struct x86_emulate_ctxt *ctxt, int seg)
4563{
4564 return get_segment_base(emul_to_vcpu(ctxt), seg);
4565}
4566
4567static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4568 struct desc_struct *desc, u32 *base3,
4569 int seg)
4570{
4571 struct kvm_segment var;
4572
4573 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4574 *selector = var.selector;
4575
4576 if (var.unusable) {
4577 memset(desc, 0, sizeof(*desc));
4578 return false;
4579 }
4580
4581 if (var.g)
4582 var.limit >>= 12;
4583 set_desc_limit(desc, var.limit);
4584 set_desc_base(desc, (unsigned long)var.base);
4585#ifdef CONFIG_X86_64
4586 if (base3)
4587 *base3 = var.base >> 32;
4588#endif
4589 desc->type = var.type;
4590 desc->s = var.s;
4591 desc->dpl = var.dpl;
4592 desc->p = var.present;
4593 desc->avl = var.avl;
4594 desc->l = var.l;
4595 desc->d = var.db;
4596 desc->g = var.g;
4597
4598 return true;
4599}
4600
4601static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4602 struct desc_struct *desc, u32 base3,
4603 int seg)
4604{
4605 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4606 struct kvm_segment var;
4607
4608 var.selector = selector;
4609 var.base = get_desc_base(desc);
4610#ifdef CONFIG_X86_64
4611 var.base |= ((u64)base3) << 32;
4612#endif
4613 var.limit = get_desc_limit(desc);
4614 if (desc->g)
4615 var.limit = (var.limit << 12) | 0xfff;
4616 var.type = desc->type;
4617 var.present = desc->p;
4618 var.dpl = desc->dpl;
4619 var.db = desc->d;
4620 var.s = desc->s;
4621 var.l = desc->l;
4622 var.g = desc->g;
4623 var.avl = desc->avl;
4624 var.present = desc->p;
4625 var.unusable = !var.present;
4626 var.padding = 0;
4627
4628 kvm_set_segment(vcpu, &var, seg);
4629 return;
4630}
4631
4632static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4633 u32 msr_index, u64 *pdata)
4634{
4635 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4636}
4637
4638static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4639 u32 msr_index, u64 data)
4640{
4641 struct msr_data msr;
4642
4643 msr.data = data;
4644 msr.index = msr_index;
4645 msr.host_initiated = false;
4646 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4647}
4648
4649static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
4650 u32 pmc, u64 *pdata)
4651{
4652 return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
4653}
4654
4655static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4656{
4657 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4658}
4659
4660static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4661{
4662 preempt_disable();
4663 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4664
4665
4666
4667
4668 clts();
4669}
4670
4671static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4672{
4673 preempt_enable();
4674}
4675
4676static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4677 struct x86_instruction_info *info,
4678 enum x86_intercept_stage stage)
4679{
4680 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4681}
4682
4683static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4684 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4685{
4686 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
4687}
4688
4689static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
4690{
4691 return kvm_register_read(emul_to_vcpu(ctxt), reg);
4692}
4693
4694static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
4695{
4696 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
4697}
4698
4699static const struct x86_emulate_ops emulate_ops = {
4700 .read_gpr = emulator_read_gpr,
4701 .write_gpr = emulator_write_gpr,
4702 .read_std = kvm_read_guest_virt_system,
4703 .write_std = kvm_write_guest_virt_system,
4704 .fetch = kvm_fetch_guest_virt,
4705 .read_emulated = emulator_read_emulated,
4706 .write_emulated = emulator_write_emulated,
4707 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4708 .invlpg = emulator_invlpg,
4709 .pio_in_emulated = emulator_pio_in_emulated,
4710 .pio_out_emulated = emulator_pio_out_emulated,
4711 .get_segment = emulator_get_segment,
4712 .set_segment = emulator_set_segment,
4713 .get_cached_segment_base = emulator_get_cached_segment_base,
4714 .get_gdt = emulator_get_gdt,
4715 .get_idt = emulator_get_idt,
4716 .set_gdt = emulator_set_gdt,
4717 .set_idt = emulator_set_idt,
4718 .get_cr = emulator_get_cr,
4719 .set_cr = emulator_set_cr,
4720 .set_rflags = emulator_set_rflags,
4721 .cpl = emulator_get_cpl,
4722 .get_dr = emulator_get_dr,
4723 .set_dr = emulator_set_dr,
4724 .set_msr = emulator_set_msr,
4725 .get_msr = emulator_get_msr,
4726 .read_pmc = emulator_read_pmc,
4727 .halt = emulator_halt,
4728 .wbinvd = emulator_wbinvd,
4729 .fix_hypercall = emulator_fix_hypercall,
4730 .get_fpu = emulator_get_fpu,
4731 .put_fpu = emulator_put_fpu,
4732 .intercept = emulator_intercept,
4733 .get_cpuid = emulator_get_cpuid,
4734};
4735
4736static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4737{
4738 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4739
4740
4741
4742
4743
4744
4745
4746 if (!(int_shadow & mask))
4747 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4748}
4749
4750static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4751{
4752 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4753 if (ctxt->exception.vector == PF_VECTOR)
4754 kvm_propagate_fault(vcpu, &ctxt->exception);
4755 else if (ctxt->exception.error_code_valid)
4756 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4757 ctxt->exception.error_code);
4758 else
4759 kvm_queue_exception(vcpu, ctxt->exception.vector);
4760}
4761
4762static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4763{
4764 memset(&ctxt->twobyte, 0,
4765 (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
4766
4767 ctxt->fetch.start = 0;
4768 ctxt->fetch.end = 0;
4769 ctxt->io_read.pos = 0;
4770 ctxt->io_read.end = 0;
4771 ctxt->mem_read.pos = 0;
4772 ctxt->mem_read.end = 0;
4773}
4774
4775static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4776{
4777 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4778 int cs_db, cs_l;
4779
4780 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4781
4782 ctxt->eflags = kvm_get_rflags(vcpu);
4783 ctxt->eip = kvm_rip_read(vcpu);
4784 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4785 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4786 cs_l ? X86EMUL_MODE_PROT64 :
4787 cs_db ? X86EMUL_MODE_PROT32 :
4788 X86EMUL_MODE_PROT16;
4789 ctxt->guest_mode = is_guest_mode(vcpu);
4790
4791 init_decode_cache(ctxt);
4792 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4793}
4794
4795int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4796{
4797 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4798 int ret;
4799
4800 init_emulate_ctxt(vcpu);
4801
4802 ctxt->op_bytes = 2;
4803 ctxt->ad_bytes = 2;
4804 ctxt->_eip = ctxt->eip + inc_eip;
4805 ret = emulate_int_real(ctxt, irq);
4806
4807 if (ret != X86EMUL_CONTINUE)
4808 return EMULATE_FAIL;
4809
4810 ctxt->eip = ctxt->_eip;
4811 kvm_rip_write(vcpu, ctxt->eip);
4812 kvm_set_rflags(vcpu, ctxt->eflags);
4813
4814 if (irq == NMI_VECTOR)
4815 vcpu->arch.nmi_pending = 0;
4816 else
4817 vcpu->arch.interrupt.pending = false;
4818
4819 return EMULATE_DONE;
4820}
4821EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4822
4823static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4824{
4825 int r = EMULATE_DONE;
4826
4827 ++vcpu->stat.insn_emulation_fail;
4828 trace_kvm_emulate_insn_failed(vcpu);
4829 if (!is_guest_mode(vcpu)) {
4830 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4831 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4832 vcpu->run->internal.ndata = 0;
4833 r = EMULATE_FAIL;
4834 }
4835 kvm_queue_exception(vcpu, UD_VECTOR);
4836
4837 return r;
4838}
4839
4840static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4841 bool write_fault_to_shadow_pgtable,
4842 int emulation_type)
4843{
4844 gpa_t gpa = cr2;
4845 pfn_t pfn;
4846
4847 if (emulation_type & EMULTYPE_NO_REEXECUTE)
4848 return false;
4849
4850 if (!vcpu->arch.mmu.direct_map) {
4851
4852
4853
4854
4855 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4856
4857
4858
4859
4860
4861 if (gpa == UNMAPPED_GVA)
4862 return true;
4863 }
4864
4865
4866
4867
4868
4869
4870
4871 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4872
4873
4874
4875
4876
4877 if (is_error_noslot_pfn(pfn))
4878 return false;
4879
4880 kvm_release_pfn_clean(pfn);
4881
4882
4883 if (vcpu->arch.mmu.direct_map) {
4884 unsigned int indirect_shadow_pages;
4885
4886 spin_lock(&vcpu->kvm->mmu_lock);
4887 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
4888 spin_unlock(&vcpu->kvm->mmu_lock);
4889
4890 if (indirect_shadow_pages)
4891 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4892
4893 return true;
4894 }
4895
4896
4897
4898
4899
4900
4901 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4902
4903
4904
4905
4906
4907
4908 return !write_fault_to_shadow_pgtable;
4909}
4910
4911static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4912 unsigned long cr2, int emulation_type)
4913{
4914 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4915 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
4916
4917 last_retry_eip = vcpu->arch.last_retry_eip;
4918 last_retry_addr = vcpu->arch.last_retry_addr;
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
4934
4935 if (!(emulation_type & EMULTYPE_RETRY))
4936 return false;
4937
4938 if (x86_page_table_writing_insn(ctxt))
4939 return false;
4940
4941 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
4942 return false;
4943
4944 vcpu->arch.last_retry_eip = ctxt->eip;
4945 vcpu->arch.last_retry_addr = cr2;
4946
4947 if (!vcpu->arch.mmu.direct_map)
4948 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4949
4950 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4951
4952 return true;
4953}
4954
4955static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
4956static int complete_emulated_pio(struct kvm_vcpu *vcpu);
4957
4958int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4959 unsigned long cr2,
4960 int emulation_type,
4961 void *insn,
4962 int insn_len)
4963{
4964 int r;
4965 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4966 bool writeback = true;
4967 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
4968
4969
4970
4971
4972
4973 vcpu->arch.write_fault_to_shadow_pgtable = false;
4974 kvm_clear_exception_queue(vcpu);
4975
4976 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4977 init_emulate_ctxt(vcpu);
4978 ctxt->interruptibility = 0;
4979 ctxt->have_exception = false;
4980 ctxt->perm_ok = false;
4981
4982 ctxt->only_vendor_specific_insn
4983 = emulation_type & EMULTYPE_TRAP_UD;
4984
4985 r = x86_decode_insn(ctxt, insn, insn_len);
4986
4987 trace_kvm_emulate_insn_start(vcpu);
4988 ++vcpu->stat.insn_emulation;
4989 if (r != EMULATION_OK) {
4990 if (emulation_type & EMULTYPE_TRAP_UD)
4991 return EMULATE_FAIL;
4992 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
4993 emulation_type))
4994 return EMULATE_DONE;
4995 if (emulation_type & EMULTYPE_SKIP)
4996 return EMULATE_FAIL;
4997 return handle_emulation_failure(vcpu);
4998 }
4999 }
5000
5001 if (emulation_type & EMULTYPE_SKIP) {
5002 kvm_rip_write(vcpu, ctxt->_eip);
5003 return EMULATE_DONE;
5004 }
5005
5006 if (retry_instruction(ctxt, cr2, emulation_type))
5007 return EMULATE_DONE;
5008
5009
5010
5011 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
5012 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5013 emulator_invalidate_register_cache(ctxt);
5014 }
5015
5016restart:
5017 r = x86_emulate_insn(ctxt);
5018
5019 if (r == EMULATION_INTERCEPTED)
5020 return EMULATE_DONE;
5021
5022 if (r == EMULATION_FAILED) {
5023 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5024 emulation_type))
5025 return EMULATE_DONE;
5026
5027 return handle_emulation_failure(vcpu);
5028 }
5029
5030 if (ctxt->have_exception) {
5031 inject_emulated_exception(vcpu);
5032 r = EMULATE_DONE;
5033 } else if (vcpu->arch.pio.count) {
5034 if (!vcpu->arch.pio.in)
5035 vcpu->arch.pio.count = 0;
5036 else {
5037 writeback = false;
5038 vcpu->arch.complete_userspace_io = complete_emulated_pio;
5039 }
5040 r = EMULATE_DO_MMIO;
5041 } else if (vcpu->mmio_needed) {
5042 if (!vcpu->mmio_is_write)
5043 writeback = false;
5044 r = EMULATE_DO_MMIO;
5045 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
5046 } else if (r == EMULATION_RESTART)
5047 goto restart;
5048 else
5049 r = EMULATE_DONE;
5050
5051 if (writeback) {
5052 toggle_interruptibility(vcpu, ctxt->interruptibility);
5053 kvm_set_rflags(vcpu, ctxt->eflags);
5054 kvm_make_request(KVM_REQ_EVENT, vcpu);
5055 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5056 kvm_rip_write(vcpu, ctxt->eip);
5057 } else
5058 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
5059
5060 return r;
5061}
5062EXPORT_SYMBOL_GPL(x86_emulate_instruction);
5063
5064int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
5065{
5066 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
5067 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
5068 size, port, &val, 1);
5069
5070 vcpu->arch.pio.count = 0;
5071 return ret;
5072}
5073EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
5074
5075static void tsc_bad(void *info)
5076{
5077 __this_cpu_write(cpu_tsc_khz, 0);
5078}
5079
5080static void tsc_khz_changed(void *data)
5081{
5082 struct cpufreq_freqs *freq = data;
5083 unsigned long khz = 0;
5084
5085 if (data)
5086 khz = freq->new;
5087 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5088 khz = cpufreq_quick_get(raw_smp_processor_id());
5089 if (!khz)
5090 khz = tsc_khz;
5091 __this_cpu_write(cpu_tsc_khz, khz);
5092}
5093
5094static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
5095 void *data)
5096{
5097 struct cpufreq_freqs *freq = data;
5098 struct kvm *kvm;
5099 struct kvm_vcpu *vcpu;
5100 int i, send_ipi = 0;
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
5142 return 0;
5143 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
5144 return 0;
5145
5146 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5147
5148 raw_spin_lock(&kvm_lock);
5149 list_for_each_entry(kvm, &vm_list, vm_list) {
5150 kvm_for_each_vcpu(i, vcpu, kvm) {
5151 if (vcpu->cpu != freq->cpu)
5152 continue;
5153 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5154 if (vcpu->cpu != smp_processor_id())
5155 send_ipi = 1;
5156 }
5157 }
5158 raw_spin_unlock(&kvm_lock);
5159
5160 if (freq->old < freq->new && send_ipi) {
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5174 }
5175 return 0;
5176}
5177
5178static struct notifier_block kvmclock_cpufreq_notifier_block = {
5179 .notifier_call = kvmclock_cpufreq_notifier
5180};
5181
5182static int kvmclock_cpu_notifier(struct notifier_block *nfb,
5183 unsigned long action, void *hcpu)
5184{
5185 unsigned int cpu = (unsigned long)hcpu;
5186
5187 switch (action) {
5188 case CPU_ONLINE:
5189 case CPU_DOWN_FAILED:
5190 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5191 break;
5192 case CPU_DOWN_PREPARE:
5193 smp_call_function_single(cpu, tsc_bad, NULL, 1);
5194 break;
5195 }
5196 return NOTIFY_OK;
5197}
5198
5199static struct notifier_block kvmclock_cpu_notifier_block = {
5200 .notifier_call = kvmclock_cpu_notifier,
5201 .priority = -INT_MAX
5202};
5203
5204static void kvm_timer_init(void)
5205{
5206 int cpu;
5207
5208 max_tsc_khz = tsc_khz;
5209 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5210 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5211#ifdef CONFIG_CPU_FREQ
5212 struct cpufreq_policy policy;
5213 memset(&policy, 0, sizeof(policy));
5214 cpu = get_cpu();
5215 cpufreq_get_policy(&policy, cpu);
5216 if (policy.cpuinfo.max_freq)
5217 max_tsc_khz = policy.cpuinfo.max_freq;
5218 put_cpu();
5219#endif
5220 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
5221 CPUFREQ_TRANSITION_NOTIFIER);
5222 }
5223 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
5224 for_each_online_cpu(cpu)
5225 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5226}
5227
5228static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
5229
5230int kvm_is_in_guest(void)
5231{
5232 return __this_cpu_read(current_vcpu) != NULL;
5233}
5234
5235static int kvm_is_user_mode(void)
5236{
5237 int user_mode = 3;
5238
5239 if (__this_cpu_read(current_vcpu))
5240 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
5241
5242 return user_mode != 0;
5243}
5244
5245static unsigned long kvm_get_guest_ip(void)
5246{
5247 unsigned long ip = 0;
5248
5249 if (__this_cpu_read(current_vcpu))
5250 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
5251
5252 return ip;
5253}
5254
5255static struct perf_guest_info_callbacks kvm_guest_cbs = {
5256 .is_in_guest = kvm_is_in_guest,
5257 .is_user_mode = kvm_is_user_mode,
5258 .get_guest_ip = kvm_get_guest_ip,
5259};
5260
5261void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
5262{
5263 __this_cpu_write(current_vcpu, vcpu);
5264}
5265EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
5266
5267void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
5268{
5269 __this_cpu_write(current_vcpu, NULL);
5270}
5271EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
5272
5273static void kvm_set_mmio_spte_mask(void)
5274{
5275 u64 mask;
5276 int maxphyaddr = boot_cpu_data.x86_phys_bits;
5277
5278
5279
5280
5281
5282
5283 mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
5284
5285
5286 mask |= 0x3ull << 62;
5287
5288
5289 mask |= 1ull;
5290
5291#ifdef CONFIG_X86_64
5292
5293
5294
5295
5296 if (maxphyaddr == 52)
5297 mask &= ~1ull;
5298#endif
5299
5300 kvm_mmu_set_mmio_spte_mask(mask);
5301}
5302
5303#ifdef CONFIG_X86_64
5304static void pvclock_gtod_update_fn(struct work_struct *work)
5305{
5306 struct kvm *kvm;
5307
5308 struct kvm_vcpu *vcpu;
5309 int i;
5310
5311 raw_spin_lock(&kvm_lock);
5312 list_for_each_entry(kvm, &vm_list, vm_list)
5313 kvm_for_each_vcpu(i, vcpu, kvm)
5314 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5315 atomic_set(&kvm_guest_has_master_clock, 0);
5316 raw_spin_unlock(&kvm_lock);
5317}
5318
5319static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
5320
5321
5322
5323
5324static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
5325 void *priv)
5326{
5327 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
5328 struct timekeeper *tk = priv;
5329
5330 update_pvclock_gtod(tk);
5331
5332
5333
5334
5335 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
5336 atomic_read(&kvm_guest_has_master_clock) != 0)
5337 queue_work(system_long_wq, &pvclock_gtod_work);
5338
5339 return 0;
5340}
5341
5342static struct notifier_block pvclock_gtod_notifier = {
5343 .notifier_call = pvclock_gtod_notify,
5344};
5345#endif
5346
5347int kvm_arch_init(void *opaque)
5348{
5349 int r;
5350 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
5351
5352 if (kvm_x86_ops) {
5353 printk(KERN_ERR "kvm: already loaded the other module\n");
5354 r = -EEXIST;
5355 goto out;
5356 }
5357
5358 if (!ops->cpu_has_kvm_support()) {
5359 printk(KERN_ERR "kvm: no hardware support\n");
5360 r = -EOPNOTSUPP;
5361 goto out;
5362 }
5363 if (ops->disabled_by_bios()) {
5364 printk(KERN_ERR "kvm: disabled by bios\n");
5365 r = -EOPNOTSUPP;
5366 goto out;
5367 }
5368
5369 r = -ENOMEM;
5370 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
5371 if (!shared_msrs) {
5372 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
5373 goto out;
5374 }
5375
5376 r = kvm_mmu_module_init();
5377 if (r)
5378 goto out_free_percpu;
5379
5380 kvm_set_mmio_spte_mask();
5381 kvm_init_msr_list();
5382
5383 kvm_x86_ops = ops;
5384 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5385 PT_DIRTY_MASK, PT64_NX_MASK, 0);
5386
5387 kvm_timer_init();
5388
5389 perf_register_guest_info_callbacks(&kvm_guest_cbs);
5390
5391 if (cpu_has_xsave)
5392 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
5393
5394 kvm_lapic_init();
5395#ifdef CONFIG_X86_64
5396 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
5397#endif
5398
5399 return 0;
5400
5401out_free_percpu:
5402 free_percpu(shared_msrs);
5403out:
5404 return r;
5405}
5406
5407void kvm_arch_exit(void)
5408{
5409 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
5410
5411 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5412 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
5413 CPUFREQ_TRANSITION_NOTIFIER);
5414 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5415#ifdef CONFIG_X86_64
5416 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
5417#endif
5418 kvm_x86_ops = NULL;
5419 kvm_mmu_module_exit();
5420 free_percpu(shared_msrs);
5421}
5422
5423int kvm_emulate_halt(struct kvm_vcpu *vcpu)
5424{
5425 ++vcpu->stat.halt_exits;
5426 if (irqchip_in_kernel(vcpu->kvm)) {
5427 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
5428 return 1;
5429 } else {
5430 vcpu->run->exit_reason = KVM_EXIT_HLT;
5431 return 0;
5432 }
5433}
5434EXPORT_SYMBOL_GPL(kvm_emulate_halt);
5435
5436int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5437{
5438 u64 param, ingpa, outgpa, ret;
5439 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5440 bool fast, longmode;
5441 int cs_db, cs_l;
5442
5443
5444
5445
5446
5447 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
5448 kvm_queue_exception(vcpu, UD_VECTOR);
5449 return 0;
5450 }
5451
5452 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
5453 longmode = is_long_mode(vcpu) && cs_l == 1;
5454
5455 if (!longmode) {
5456 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
5457 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
5458 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
5459 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
5460 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
5461 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
5462 }
5463#ifdef CONFIG_X86_64
5464 else {
5465 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
5466 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
5467 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
5468 }
5469#endif
5470
5471 code = param & 0xffff;
5472 fast = (param >> 16) & 0x1;
5473 rep_cnt = (param >> 32) & 0xfff;
5474 rep_idx = (param >> 48) & 0xfff;
5475
5476 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
5477
5478 switch (code) {
5479 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5480 kvm_vcpu_on_spin(vcpu);
5481 break;
5482 default:
5483 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5484 break;
5485 }
5486
5487 ret = res | (((u64)rep_done & 0xfff) << 32);
5488 if (longmode) {
5489 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5490 } else {
5491 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5492 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5493 }
5494
5495 return 1;
5496}
5497
5498int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5499{
5500 unsigned long nr, a0, a1, a2, a3, ret;
5501 int r = 1;
5502
5503 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5504 return kvm_hv_hypercall(vcpu);
5505
5506 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5507 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5508 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5509 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5510 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5511
5512 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5513
5514 if (!is_long_mode(vcpu)) {
5515 nr &= 0xFFFFFFFF;
5516 a0 &= 0xFFFFFFFF;
5517 a1 &= 0xFFFFFFFF;
5518 a2 &= 0xFFFFFFFF;
5519 a3 &= 0xFFFFFFFF;
5520 }
5521
5522 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5523 ret = -KVM_EPERM;
5524 goto out;
5525 }
5526
5527 switch (nr) {
5528 case KVM_HC_VAPIC_POLL_IRQ:
5529 ret = 0;
5530 break;
5531 default:
5532 ret = -KVM_ENOSYS;
5533 break;
5534 }
5535out:
5536 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5537 ++vcpu->stat.hypercalls;
5538 return r;
5539}
5540EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5541
5542static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5543{
5544 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5545 char instruction[3];
5546 unsigned long rip = kvm_rip_read(vcpu);
5547
5548 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5549
5550 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
5551}
5552
5553
5554
5555
5556
5557
5558
5559static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5560{
5561 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5562 vcpu->run->request_interrupt_window &&
5563 kvm_arch_interrupt_allowed(vcpu));
5564}
5565
5566static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5567{
5568 struct kvm_run *kvm_run = vcpu->run;
5569
5570 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5571 kvm_run->cr8 = kvm_get_cr8(vcpu);
5572 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5573 if (irqchip_in_kernel(vcpu->kvm))
5574 kvm_run->ready_for_interrupt_injection = 1;
5575 else
5576 kvm_run->ready_for_interrupt_injection =
5577 kvm_arch_interrupt_allowed(vcpu) &&
5578 !kvm_cpu_has_interrupt(vcpu) &&
5579 !kvm_event_needs_reinjection(vcpu);
5580}
5581
5582static int vapic_enter(struct kvm_vcpu *vcpu)
5583{
5584 struct kvm_lapic *apic = vcpu->arch.apic;
5585 struct page *page;
5586
5587 if (!apic || !apic->vapic_addr)
5588 return 0;
5589
5590 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5591 if (is_error_page(page))
5592 return -EFAULT;
5593
5594 vcpu->arch.apic->vapic_page = page;
5595 return 0;
5596}
5597
5598static void vapic_exit(struct kvm_vcpu *vcpu)
5599{
5600 struct kvm_lapic *apic = vcpu->arch.apic;
5601 int idx;
5602
5603 if (!apic || !apic->vapic_addr)
5604 return;
5605
5606 idx = srcu_read_lock(&vcpu->kvm->srcu);
5607 kvm_release_page_dirty(apic->vapic_page);
5608 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5609 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5610}
5611
5612static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5613{
5614 int max_irr, tpr;
5615
5616 if (!kvm_x86_ops->update_cr8_intercept)
5617 return;
5618
5619 if (!vcpu->arch.apic)
5620 return;
5621
5622 if (!vcpu->arch.apic->vapic_addr)
5623 max_irr = kvm_lapic_find_highest_irr(vcpu);
5624 else
5625 max_irr = -1;
5626
5627 if (max_irr != -1)
5628 max_irr >>= 4;
5629
5630 tpr = kvm_lapic_get_cr8(vcpu);
5631
5632 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5633}
5634
5635static void inject_pending_event(struct kvm_vcpu *vcpu)
5636{
5637
5638 if (vcpu->arch.exception.pending) {
5639 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5640 vcpu->arch.exception.has_error_code,
5641 vcpu->arch.exception.error_code);
5642 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5643 vcpu->arch.exception.has_error_code,
5644 vcpu->arch.exception.error_code,
5645 vcpu->arch.exception.reinject);
5646 return;
5647 }
5648
5649 if (vcpu->arch.nmi_injected) {
5650 kvm_x86_ops->set_nmi(vcpu);
5651 return;
5652 }
5653
5654 if (vcpu->arch.interrupt.pending) {
5655 kvm_x86_ops->set_irq(vcpu);
5656 return;
5657 }
5658
5659
5660 if (vcpu->arch.nmi_pending) {
5661 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5662 --vcpu->arch.nmi_pending;
5663 vcpu->arch.nmi_injected = true;
5664 kvm_x86_ops->set_nmi(vcpu);
5665 }
5666 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5667 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5668 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5669 false);
5670 kvm_x86_ops->set_irq(vcpu);
5671 }
5672 }
5673}
5674
5675static void process_nmi(struct kvm_vcpu *vcpu)
5676{
5677 unsigned limit = 2;
5678
5679
5680
5681
5682
5683
5684 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
5685 limit = 1;
5686
5687 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
5688 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
5689 kvm_make_request(KVM_REQ_EVENT, vcpu);
5690}
5691
5692static void kvm_gen_update_masterclock(struct kvm *kvm)
5693{
5694#ifdef CONFIG_X86_64
5695 int i;
5696 struct kvm_vcpu *vcpu;
5697 struct kvm_arch *ka = &kvm->arch;
5698
5699 spin_lock(&ka->pvclock_gtod_sync_lock);
5700 kvm_make_mclock_inprogress_request(kvm);
5701
5702 pvclock_update_vm_gtod_copy(kvm);
5703
5704 kvm_for_each_vcpu(i, vcpu, kvm)
5705 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
5706
5707
5708 kvm_for_each_vcpu(i, vcpu, kvm)
5709 clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
5710
5711 spin_unlock(&ka->pvclock_gtod_sync_lock);
5712#endif
5713}
5714
5715static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5716{
5717 u64 eoi_exit_bitmap[4];
5718 u32 tmr[8];
5719
5720 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
5721 return;
5722
5723 memset(eoi_exit_bitmap, 0, 32);
5724 memset(tmr, 0, 32);
5725
5726 kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
5727 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5728 kvm_apic_update_tmr(vcpu, tmr);
5729}
5730
5731static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5732{
5733 int r;
5734 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5735 vcpu->run->request_interrupt_window;
5736 bool req_immediate_exit = false;
5737
5738 if (vcpu->requests) {
5739 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
5740 kvm_mmu_unload(vcpu);
5741 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5742 __kvm_migrate_timers(vcpu);
5743 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
5744 kvm_gen_update_masterclock(vcpu->kvm);
5745 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
5746 kvm_gen_kvmclock_update(vcpu);
5747 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5748 r = kvm_guest_time_update(vcpu);
5749 if (unlikely(r))
5750 goto out;
5751 }
5752 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
5753 kvm_mmu_sync_roots(vcpu);
5754 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
5755 kvm_x86_ops->tlb_flush(vcpu);
5756 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
5757 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
5758 r = 0;
5759 goto out;
5760 }
5761 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
5762 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5763 r = 0;
5764 goto out;
5765 }
5766 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
5767 vcpu->fpu_active = 0;
5768 kvm_x86_ops->fpu_deactivate(vcpu);
5769 }
5770 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5771
5772 vcpu->arch.apf.halted = true;
5773 r = 1;
5774 goto out;
5775 }
5776 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
5777 record_steal_time(vcpu);
5778 if (kvm_check_request(KVM_REQ_NMI, vcpu))
5779 process_nmi(vcpu);
5780 if (kvm_check_request(KVM_REQ_PMU, vcpu))
5781 kvm_handle_pmu_event(vcpu);
5782 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5783 kvm_deliver_pmi(vcpu);
5784 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
5785 vcpu_scan_ioapic(vcpu);
5786 }
5787
5788 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5789 kvm_apic_accept_events(vcpu);
5790 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
5791 r = 1;
5792 goto out;
5793 }
5794
5795 inject_pending_event(vcpu);
5796
5797
5798 if (vcpu->arch.nmi_pending)
5799 req_immediate_exit =
5800 kvm_x86_ops->enable_nmi_window(vcpu) != 0;
5801 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5802 req_immediate_exit =
5803 kvm_x86_ops->enable_irq_window(vcpu) != 0;
5804
5805 if (kvm_lapic_enabled(vcpu)) {
5806
5807
5808
5809
5810 if (kvm_x86_ops->hwapic_irr_update)
5811 kvm_x86_ops->hwapic_irr_update(vcpu,
5812 kvm_lapic_find_highest_irr(vcpu));
5813 update_cr8_intercept(vcpu);
5814 kvm_lapic_sync_to_vapic(vcpu);
5815 }
5816 }
5817
5818 r = kvm_mmu_reload(vcpu);
5819 if (unlikely(r)) {
5820 goto cancel_injection;
5821 }
5822
5823 preempt_disable();
5824
5825 kvm_x86_ops->prepare_guest_switch(vcpu);
5826 if (vcpu->fpu_active)
5827 kvm_load_guest_fpu(vcpu);
5828 kvm_load_guest_xcr0(vcpu);
5829
5830 vcpu->mode = IN_GUEST_MODE;
5831
5832
5833
5834
5835 smp_mb();
5836
5837 local_irq_disable();
5838
5839 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
5840 || need_resched() || signal_pending(current)) {
5841 vcpu->mode = OUTSIDE_GUEST_MODE;
5842 smp_wmb();
5843 local_irq_enable();
5844 preempt_enable();
5845 r = 1;
5846 goto cancel_injection;
5847 }
5848
5849 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5850
5851 if (req_immediate_exit)
5852 smp_send_reschedule(vcpu->cpu);
5853
5854 kvm_guest_enter();
5855
5856 if (unlikely(vcpu->arch.switch_db_regs)) {
5857 set_debugreg(0, 7);
5858 set_debugreg(vcpu->arch.eff_db[0], 0);
5859 set_debugreg(vcpu->arch.eff_db[1], 1);
5860 set_debugreg(vcpu->arch.eff_db[2], 2);
5861 set_debugreg(vcpu->arch.eff_db[3], 3);
5862 }
5863
5864 trace_kvm_entry(vcpu->vcpu_id);
5865 kvm_x86_ops->run(vcpu);
5866
5867
5868
5869
5870
5871
5872
5873
5874 if (hw_breakpoint_active())
5875 hw_breakpoint_restore();
5876
5877 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
5878 native_read_tsc());
5879
5880 vcpu->mode = OUTSIDE_GUEST_MODE;
5881 smp_wmb();
5882
5883
5884 kvm_x86_ops->handle_external_intr(vcpu);
5885
5886 ++vcpu->stat.exits;
5887
5888
5889
5890
5891
5892
5893
5894 barrier();
5895
5896 kvm_guest_exit();
5897
5898 preempt_enable();
5899
5900 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5901
5902
5903
5904
5905 if (unlikely(prof_on == KVM_PROFILING)) {
5906 unsigned long rip = kvm_rip_read(vcpu);
5907 profile_hit(KVM_PROFILING, (void *)rip);
5908 }
5909
5910 if (unlikely(vcpu->arch.tsc_always_catchup))
5911 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5912
5913 if (vcpu->arch.apic_attention)
5914 kvm_lapic_sync_from_vapic(vcpu);
5915
5916 r = kvm_x86_ops->handle_exit(vcpu);
5917 return r;
5918
5919cancel_injection:
5920 kvm_x86_ops->cancel_injection(vcpu);
5921 if (unlikely(vcpu->arch.apic_attention))
5922 kvm_lapic_sync_from_vapic(vcpu);
5923out:
5924 return r;
5925}
5926
5927
5928static int __vcpu_run(struct kvm_vcpu *vcpu)
5929{
5930 int r;
5931 struct kvm *kvm = vcpu->kvm;
5932
5933 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5934 r = vapic_enter(vcpu);
5935 if (r) {
5936 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5937 return r;
5938 }
5939
5940 r = 1;
5941 while (r > 0) {
5942 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5943 !vcpu->arch.apf.halted)
5944 r = vcpu_enter_guest(vcpu);
5945 else {
5946 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5947 kvm_vcpu_block(vcpu);
5948 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5949 if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
5950 kvm_apic_accept_events(vcpu);
5951 switch(vcpu->arch.mp_state) {
5952 case KVM_MP_STATE_HALTED:
5953 vcpu->arch.mp_state =
5954 KVM_MP_STATE_RUNNABLE;
5955 case KVM_MP_STATE_RUNNABLE:
5956 vcpu->arch.apf.halted = false;
5957 break;
5958 case KVM_MP_STATE_INIT_RECEIVED:
5959 break;
5960 default:
5961 r = -EINTR;
5962 break;
5963 }
5964 }
5965 }
5966
5967 if (r <= 0)
5968 break;
5969
5970 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
5971 if (kvm_cpu_has_pending_timer(vcpu))
5972 kvm_inject_pending_timer_irqs(vcpu);
5973
5974 if (dm_request_for_irq_injection(vcpu)) {
5975 r = -EINTR;
5976 vcpu->run->exit_reason = KVM_EXIT_INTR;
5977 ++vcpu->stat.request_irq_exits;
5978 }
5979
5980 kvm_check_async_pf_completion(vcpu);
5981
5982 if (signal_pending(current)) {
5983 r = -EINTR;
5984 vcpu->run->exit_reason = KVM_EXIT_INTR;
5985 ++vcpu->stat.signal_exits;
5986 }
5987 if (need_resched()) {
5988 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5989 kvm_resched(vcpu);
5990 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5991 }
5992 }
5993
5994 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5995
5996 vapic_exit(vcpu);
5997
5998 return r;
5999}
6000
6001static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
6002{
6003 int r;
6004 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6005 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
6006 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6007 if (r != EMULATE_DONE)
6008 return 0;
6009 return 1;
6010}
6011
6012static int complete_emulated_pio(struct kvm_vcpu *vcpu)
6013{
6014 BUG_ON(!vcpu->arch.pio.count);
6015
6016 return complete_emulated_io(vcpu);
6017}
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
6038{
6039 struct kvm_run *run = vcpu->run;
6040 struct kvm_mmio_fragment *frag;
6041 unsigned len;
6042
6043 BUG_ON(!vcpu->mmio_needed);
6044
6045
6046 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
6047 len = min(8u, frag->len);
6048 if (!vcpu->mmio_is_write)
6049 memcpy(frag->data, run->mmio.data, len);
6050
6051 if (frag->len <= 8) {
6052
6053 frag++;
6054 vcpu->mmio_cur_fragment++;
6055 } else {
6056
6057 frag->data += len;
6058 frag->gpa += len;
6059 frag->len -= len;
6060 }
6061
6062 if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
6063 vcpu->mmio_needed = 0;
6064 if (vcpu->mmio_is_write)
6065 return 1;
6066 vcpu->mmio_read_completed = 1;
6067 return complete_emulated_io(vcpu);
6068 }
6069
6070 run->exit_reason = KVM_EXIT_MMIO;
6071 run->mmio.phys_addr = frag->gpa;
6072 if (vcpu->mmio_is_write)
6073 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
6074 run->mmio.len = min(8u, frag->len);
6075 run->mmio.is_write = vcpu->mmio_is_write;
6076 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6077 return 0;
6078}
6079
6080
6081int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
6082{
6083 int r;
6084 sigset_t sigsaved;
6085
6086 if (!tsk_used_math(current) && init_fpu(current))
6087 return -ENOMEM;
6088
6089 if (vcpu->sigset_active)
6090 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
6091
6092 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
6093 kvm_vcpu_block(vcpu);
6094 kvm_apic_accept_events(vcpu);
6095 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
6096 r = -EAGAIN;
6097 goto out;
6098 }
6099
6100
6101 if (!irqchip_in_kernel(vcpu->kvm)) {
6102 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
6103 r = -EINVAL;
6104 goto out;
6105 }
6106 }
6107
6108 if (unlikely(vcpu->arch.complete_userspace_io)) {
6109 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
6110 vcpu->arch.complete_userspace_io = NULL;
6111 r = cui(vcpu);
6112 if (r <= 0)
6113 goto out;
6114 } else
6115 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
6116
6117 r = __vcpu_run(vcpu);
6118
6119out:
6120 post_kvm_run_save(vcpu);
6121 if (vcpu->sigset_active)
6122 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
6123
6124 return r;
6125}
6126
6127int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6128{
6129 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
6130
6131
6132
6133
6134
6135
6136
6137 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
6138 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6139 }
6140 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
6141 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
6142 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
6143 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
6144 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
6145 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
6146 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
6147 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
6148#ifdef CONFIG_X86_64
6149 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
6150 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
6151 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
6152 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
6153 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
6154 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
6155 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
6156 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
6157#endif
6158
6159 regs->rip = kvm_rip_read(vcpu);
6160 regs->rflags = kvm_get_rflags(vcpu);
6161
6162 return 0;
6163}
6164
6165int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6166{
6167 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
6168 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6169
6170 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
6171 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
6172 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
6173 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
6174 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
6175 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
6176 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
6177 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
6178#ifdef CONFIG_X86_64
6179 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
6180 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
6181 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
6182 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
6183 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
6184 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
6185 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
6186 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
6187#endif
6188
6189 kvm_rip_write(vcpu, regs->rip);
6190 kvm_set_rflags(vcpu, regs->rflags);
6191
6192 vcpu->arch.exception.pending = false;
6193
6194 kvm_make_request(KVM_REQ_EVENT, vcpu);
6195
6196 return 0;
6197}
6198
6199void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
6200{
6201 struct kvm_segment cs;
6202
6203 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6204 *db = cs.db;
6205 *l = cs.l;
6206}
6207EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
6208
6209int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
6210 struct kvm_sregs *sregs)
6211{
6212 struct desc_ptr dt;
6213
6214 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6215 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6216 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6217 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6218 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6219 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6220
6221 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6222 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6223
6224 kvm_x86_ops->get_idt(vcpu, &dt);
6225 sregs->idt.limit = dt.size;
6226 sregs->idt.base = dt.address;
6227 kvm_x86_ops->get_gdt(vcpu, &dt);
6228 sregs->gdt.limit = dt.size;
6229 sregs->gdt.base = dt.address;
6230
6231 sregs->cr0 = kvm_read_cr0(vcpu);
6232 sregs->cr2 = vcpu->arch.cr2;
6233 sregs->cr3 = kvm_read_cr3(vcpu);
6234 sregs->cr4 = kvm_read_cr4(vcpu);
6235 sregs->cr8 = kvm_get_cr8(vcpu);
6236 sregs->efer = vcpu->arch.efer;
6237 sregs->apic_base = kvm_get_apic_base(vcpu);
6238
6239 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
6240
6241 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
6242 set_bit(vcpu->arch.interrupt.nr,
6243 (unsigned long *)sregs->interrupt_bitmap);
6244
6245 return 0;
6246}
6247
6248int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
6249 struct kvm_mp_state *mp_state)
6250{
6251 kvm_apic_accept_events(vcpu);
6252 mp_state->mp_state = vcpu->arch.mp_state;
6253 return 0;
6254}
6255
6256int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
6257 struct kvm_mp_state *mp_state)
6258{
6259 if (!kvm_vcpu_has_lapic(vcpu) &&
6260 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
6261 return -EINVAL;
6262
6263 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
6264 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
6265 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
6266 } else
6267 vcpu->arch.mp_state = mp_state->mp_state;
6268 kvm_make_request(KVM_REQ_EVENT, vcpu);
6269 return 0;
6270}
6271
6272int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
6273 int reason, bool has_error_code, u32 error_code)
6274{
6275 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6276 int ret;
6277
6278 init_emulate_ctxt(vcpu);
6279
6280 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
6281 has_error_code, error_code);
6282
6283 if (ret)
6284 return EMULATE_FAIL;
6285
6286 kvm_rip_write(vcpu, ctxt->eip);
6287 kvm_set_rflags(vcpu, ctxt->eflags);
6288 kvm_make_request(KVM_REQ_EVENT, vcpu);
6289 return EMULATE_DONE;
6290}
6291EXPORT_SYMBOL_GPL(kvm_task_switch);
6292
6293int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6294 struct kvm_sregs *sregs)
6295{
6296 int mmu_reset_needed = 0;
6297 int pending_vec, max_bits, idx;
6298 struct desc_ptr dt;
6299
6300 if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
6301 return -EINVAL;
6302
6303 dt.size = sregs->idt.limit;
6304 dt.address = sregs->idt.base;
6305 kvm_x86_ops->set_idt(vcpu, &dt);
6306 dt.size = sregs->gdt.limit;
6307 dt.address = sregs->gdt.base;
6308 kvm_x86_ops->set_gdt(vcpu, &dt);
6309
6310 vcpu->arch.cr2 = sregs->cr2;
6311 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
6312 vcpu->arch.cr3 = sregs->cr3;
6313 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
6314
6315 kvm_set_cr8(vcpu, sregs->cr8);
6316
6317 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
6318 kvm_x86_ops->set_efer(vcpu, sregs->efer);
6319 kvm_set_apic_base(vcpu, sregs->apic_base);
6320
6321 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
6322 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
6323 vcpu->arch.cr0 = sregs->cr0;
6324
6325 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
6326 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
6327 if (sregs->cr4 & X86_CR4_OSXSAVE)
6328 kvm_update_cpuid(vcpu);
6329
6330 idx = srcu_read_lock(&vcpu->kvm->srcu);
6331 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
6332 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6333 mmu_reset_needed = 1;
6334 }
6335 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6336
6337 if (mmu_reset_needed)
6338 kvm_mmu_reset_context(vcpu);
6339
6340 max_bits = KVM_NR_INTERRUPTS;
6341 pending_vec = find_first_bit(
6342 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
6343 if (pending_vec < max_bits) {
6344 kvm_queue_interrupt(vcpu, pending_vec, false);
6345 pr_debug("Set back pending irq %d\n", pending_vec);
6346 }
6347
6348 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6349 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6350 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6351 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6352 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6353 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6354
6355 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6356 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6357
6358 update_cr8_intercept(vcpu);
6359
6360
6361 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
6362 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
6363 !is_protmode(vcpu))
6364 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6365
6366 kvm_make_request(KVM_REQ_EVENT, vcpu);
6367
6368 return 0;
6369}
6370
6371int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
6372 struct kvm_guest_debug *dbg)
6373{
6374 unsigned long rflags;
6375 int i, r;
6376
6377 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
6378 r = -EBUSY;
6379 if (vcpu->arch.exception.pending)
6380 goto out;
6381 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
6382 kvm_queue_exception(vcpu, DB_VECTOR);
6383 else
6384 kvm_queue_exception(vcpu, BP_VECTOR);
6385 }
6386
6387
6388
6389
6390
6391 rflags = kvm_get_rflags(vcpu);
6392
6393 vcpu->guest_debug = dbg->control;
6394 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
6395 vcpu->guest_debug = 0;
6396
6397 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
6398 for (i = 0; i < KVM_NR_DB_REGS; ++i)
6399 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
6400 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
6401 } else {
6402 for (i = 0; i < KVM_NR_DB_REGS; i++)
6403 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6404 }
6405 kvm_update_dr7(vcpu);
6406
6407 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6408 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
6409 get_segment_base(vcpu, VCPU_SREG_CS);
6410
6411
6412
6413
6414
6415 kvm_set_rflags(vcpu, rflags);
6416
6417 kvm_x86_ops->update_db_bp_intercept(vcpu);
6418
6419 r = 0;
6420
6421out:
6422
6423 return r;
6424}
6425
6426
6427
6428
6429int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
6430 struct kvm_translation *tr)
6431{
6432 unsigned long vaddr = tr->linear_address;
6433 gpa_t gpa;
6434 int idx;
6435
6436 idx = srcu_read_lock(&vcpu->kvm->srcu);
6437 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
6438 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6439 tr->physical_address = gpa;
6440 tr->valid = gpa != UNMAPPED_GVA;
6441 tr->writeable = 1;
6442 tr->usermode = 0;
6443
6444 return 0;
6445}
6446
6447int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6448{
6449 struct i387_fxsave_struct *fxsave =
6450 &vcpu->arch.guest_fpu.state->fxsave;
6451
6452 memcpy(fpu->fpr, fxsave->st_space, 128);
6453 fpu->fcw = fxsave->cwd;
6454 fpu->fsw = fxsave->swd;
6455 fpu->ftwx = fxsave->twd;
6456 fpu->last_opcode = fxsave->fop;
6457 fpu->last_ip = fxsave->rip;
6458 fpu->last_dp = fxsave->rdp;
6459 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
6460
6461 return 0;
6462}
6463
6464int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6465{
6466 struct i387_fxsave_struct *fxsave =
6467 &vcpu->arch.guest_fpu.state->fxsave;
6468
6469 memcpy(fxsave->st_space, fpu->fpr, 128);
6470 fxsave->cwd = fpu->fcw;
6471 fxsave->swd = fpu->fsw;
6472 fxsave->twd = fpu->ftwx;
6473 fxsave->fop = fpu->last_opcode;
6474 fxsave->rip = fpu->last_ip;
6475 fxsave->rdp = fpu->last_dp;
6476 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
6477
6478 return 0;
6479}
6480
6481int fx_init(struct kvm_vcpu *vcpu)
6482{
6483 int err;
6484
6485 err = fpu_alloc(&vcpu->arch.guest_fpu);
6486 if (err)
6487 return err;
6488
6489 fpu_finit(&vcpu->arch.guest_fpu);
6490
6491
6492
6493
6494 vcpu->arch.xcr0 = XSTATE_FP;
6495
6496 vcpu->arch.cr0 |= X86_CR0_ET;
6497
6498 return 0;
6499}
6500EXPORT_SYMBOL_GPL(fx_init);
6501
6502static void fx_free(struct kvm_vcpu *vcpu)
6503{
6504 fpu_free(&vcpu->arch.guest_fpu);
6505}
6506
6507void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
6508{
6509 if (vcpu->guest_fpu_loaded)
6510 return;
6511
6512
6513
6514
6515
6516
6517 kvm_put_guest_xcr0(vcpu);
6518 vcpu->guest_fpu_loaded = 1;
6519 __kernel_fpu_begin();
6520 fpu_restore_checking(&vcpu->arch.guest_fpu);
6521 trace_kvm_fpu(1);
6522}
6523
6524void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6525{
6526 kvm_put_guest_xcr0(vcpu);
6527
6528 if (!vcpu->guest_fpu_loaded)
6529 return;
6530
6531 vcpu->guest_fpu_loaded = 0;
6532 fpu_save_init(&vcpu->arch.guest_fpu);
6533 __kernel_fpu_end();
6534 ++vcpu->stat.fpu_reload;
6535 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
6536 trace_kvm_fpu(0);
6537}
6538
6539void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
6540{
6541 kvmclock_reset(vcpu);
6542
6543 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6544 fx_free(vcpu);
6545 kvm_x86_ops->vcpu_free(vcpu);
6546}
6547
6548struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
6549 unsigned int id)
6550{
6551 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
6552 printk_once(KERN_WARNING
6553 "kvm: SMP vm created on host with unstable TSC; "
6554 "guest TSC will not be reliable\n");
6555 return kvm_x86_ops->vcpu_create(kvm, id);
6556}
6557
6558int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6559{
6560 int r;
6561
6562 vcpu->arch.mtrr_state.have_fixed = 1;
6563 r = vcpu_load(vcpu);
6564 if (r)
6565 return r;
6566 kvm_vcpu_reset(vcpu);
6567 r = kvm_mmu_setup(vcpu);
6568 vcpu_put(vcpu);
6569
6570 return r;
6571}
6572
6573int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6574{
6575 int r;
6576 struct msr_data msr;
6577
6578 r = vcpu_load(vcpu);
6579 if (r)
6580 return r;
6581 msr.data = 0x0;
6582 msr.index = MSR_IA32_TSC;
6583 msr.host_initiated = true;
6584 kvm_write_tsc(vcpu, &msr);
6585 vcpu_put(vcpu);
6586
6587 return r;
6588}
6589
6590void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6591{
6592 int r;
6593 vcpu->arch.apf.msr_val = 0;
6594
6595 r = vcpu_load(vcpu);
6596 BUG_ON(r);
6597 kvm_mmu_unload(vcpu);
6598 vcpu_put(vcpu);
6599
6600 fx_free(vcpu);
6601 kvm_x86_ops->vcpu_free(vcpu);
6602}
6603
6604void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6605{
6606 atomic_set(&vcpu->arch.nmi_queued, 0);
6607 vcpu->arch.nmi_pending = 0;
6608 vcpu->arch.nmi_injected = false;
6609
6610 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6611 vcpu->arch.dr6 = DR6_FIXED_1;
6612 vcpu->arch.dr7 = DR7_FIXED_1;
6613 kvm_update_dr7(vcpu);
6614
6615 kvm_make_request(KVM_REQ_EVENT, vcpu);
6616 vcpu->arch.apf.msr_val = 0;
6617 vcpu->arch.st.msr_val = 0;
6618
6619 kvmclock_reset(vcpu);
6620
6621 kvm_clear_async_pf_completion_queue(vcpu);
6622 kvm_async_pf_hash_reset(vcpu);
6623 vcpu->arch.apf.halted = false;
6624
6625 kvm_pmu_reset(vcpu);
6626
6627 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
6628 vcpu->arch.regs_avail = ~0;
6629 vcpu->arch.regs_dirty = ~0;
6630
6631 kvm_x86_ops->vcpu_reset(vcpu);
6632}
6633
6634void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
6635{
6636 struct kvm_segment cs;
6637
6638 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6639 cs.selector = vector << 8;
6640 cs.base = vector << 12;
6641 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
6642 kvm_rip_write(vcpu, 0);
6643}
6644
6645int kvm_arch_hardware_enable(void *garbage)
6646{
6647 struct kvm *kvm;
6648 struct kvm_vcpu *vcpu;
6649 int i;
6650 int ret;
6651 u64 local_tsc;
6652 u64 max_tsc = 0;
6653 bool stable, backwards_tsc = false;
6654
6655 kvm_shared_msr_cpu_online();
6656 ret = kvm_x86_ops->hardware_enable(garbage);
6657 if (ret != 0)
6658 return ret;
6659
6660 local_tsc = native_read_tsc();
6661 stable = !check_tsc_unstable();
6662 list_for_each_entry(kvm, &vm_list, vm_list) {
6663 kvm_for_each_vcpu(i, vcpu, kvm) {
6664 if (!stable && vcpu->cpu == smp_processor_id())
6665 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
6666 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6667 backwards_tsc = true;
6668 if (vcpu->arch.last_host_tsc > max_tsc)
6669 max_tsc = vcpu->arch.last_host_tsc;
6670 }
6671 }
6672 }
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712 if (backwards_tsc) {
6713 u64 delta_cyc = max_tsc - local_tsc;
6714 list_for_each_entry(kvm, &vm_list, vm_list) {
6715 kvm_for_each_vcpu(i, vcpu, kvm) {
6716 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6717 vcpu->arch.last_host_tsc = local_tsc;
6718 set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
6719 &vcpu->requests);
6720 }
6721
6722
6723
6724
6725
6726
6727
6728 kvm->arch.last_tsc_nsec = 0;
6729 kvm->arch.last_tsc_write = 0;
6730 }
6731
6732 }
6733 return 0;
6734}
6735
6736void kvm_arch_hardware_disable(void *garbage)
6737{
6738 kvm_x86_ops->hardware_disable(garbage);
6739 drop_user_return_notifiers(garbage);
6740}
6741
6742int kvm_arch_hardware_setup(void)
6743{
6744 return kvm_x86_ops->hardware_setup();
6745}
6746
6747void kvm_arch_hardware_unsetup(void)
6748{
6749 kvm_x86_ops->hardware_unsetup();
6750}
6751
6752void kvm_arch_check_processor_compat(void *rtn)
6753{
6754 kvm_x86_ops->check_processor_compatibility(rtn);
6755}
6756
6757bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6758{
6759 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6760}
6761
6762struct static_key kvm_no_apic_vcpu __read_mostly;
6763
6764int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6765{
6766 struct page *page;
6767 struct kvm *kvm;
6768 int r;
6769
6770 BUG_ON(vcpu->kvm == NULL);
6771 kvm = vcpu->kvm;
6772
6773 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
6774 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
6775 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6776 else
6777 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
6778
6779 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
6780 if (!page) {
6781 r = -ENOMEM;
6782 goto fail;
6783 }
6784 vcpu->arch.pio_data = page_address(page);
6785
6786 kvm_set_tsc_khz(vcpu, max_tsc_khz);
6787
6788 r = kvm_mmu_create(vcpu);
6789 if (r < 0)
6790 goto fail_free_pio_data;
6791
6792 if (irqchip_in_kernel(kvm)) {
6793 r = kvm_create_lapic(vcpu);
6794 if (r < 0)
6795 goto fail_mmu_destroy;
6796 } else
6797 static_key_slow_inc(&kvm_no_apic_vcpu);
6798
6799 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
6800 GFP_KERNEL);
6801 if (!vcpu->arch.mce_banks) {
6802 r = -ENOMEM;
6803 goto fail_free_lapic;
6804 }
6805 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
6806
6807 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
6808 r = -ENOMEM;
6809 goto fail_free_mce_banks;
6810 }
6811
6812 r = fx_init(vcpu);
6813 if (r)
6814 goto fail_free_wbinvd_dirty_mask;
6815
6816 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6817 vcpu->arch.pv_time_enabled = false;
6818 kvm_async_pf_hash_reset(vcpu);
6819 kvm_pmu_init(vcpu);
6820
6821 return 0;
6822fail_free_wbinvd_dirty_mask:
6823 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6824fail_free_mce_banks:
6825 kfree(vcpu->arch.mce_banks);
6826fail_free_lapic:
6827 kvm_free_lapic(vcpu);
6828fail_mmu_destroy:
6829 kvm_mmu_destroy(vcpu);
6830fail_free_pio_data:
6831 free_page((unsigned long)vcpu->arch.pio_data);
6832fail:
6833 return r;
6834}
6835
6836void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
6837{
6838 int idx;
6839
6840 kvm_pmu_destroy(vcpu);
6841 kfree(vcpu->arch.mce_banks);
6842 kvm_free_lapic(vcpu);
6843 idx = srcu_read_lock(&vcpu->kvm->srcu);
6844 kvm_mmu_destroy(vcpu);
6845 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6846 free_page((unsigned long)vcpu->arch.pio_data);
6847 if (!irqchip_in_kernel(vcpu->kvm))
6848 static_key_slow_dec(&kvm_no_apic_vcpu);
6849}
6850
6851int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6852{
6853 if (type)
6854 return -EINVAL;
6855
6856 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6857 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
6858 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6859
6860
6861 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
6862
6863 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
6864 &kvm->arch.irq_sources_bitmap);
6865
6866 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6867 mutex_init(&kvm->arch.apic_map_lock);
6868 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
6869
6870 pvclock_update_vm_gtod_copy(kvm);
6871
6872 return 0;
6873}
6874
6875static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
6876{
6877 int r;
6878 r = vcpu_load(vcpu);
6879 BUG_ON(r);
6880 kvm_mmu_unload(vcpu);
6881 vcpu_put(vcpu);
6882}
6883
6884static void kvm_free_vcpus(struct kvm *kvm)
6885{
6886 unsigned int i;
6887 struct kvm_vcpu *vcpu;
6888
6889
6890
6891
6892 kvm_for_each_vcpu(i, vcpu, kvm) {
6893 kvm_clear_async_pf_completion_queue(vcpu);
6894 kvm_unload_vcpu_mmu(vcpu);
6895 }
6896 kvm_for_each_vcpu(i, vcpu, kvm)
6897 kvm_arch_vcpu_free(vcpu);
6898
6899 mutex_lock(&kvm->lock);
6900 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
6901 kvm->vcpus[i] = NULL;
6902
6903 atomic_set(&kvm->online_vcpus, 0);
6904 mutex_unlock(&kvm->lock);
6905}
6906
6907void kvm_arch_sync_events(struct kvm *kvm)
6908{
6909 kvm_free_all_assigned_devices(kvm);
6910 kvm_free_pit(kvm);
6911}
6912
6913void kvm_arch_destroy_vm(struct kvm *kvm)
6914{
6915 if (current->mm == kvm->mm) {
6916
6917
6918
6919
6920
6921 struct kvm_userspace_memory_region mem;
6922 memset(&mem, 0, sizeof(mem));
6923 mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
6924 kvm_set_memory_region(kvm, &mem);
6925
6926 mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
6927 kvm_set_memory_region(kvm, &mem);
6928
6929 mem.slot = TSS_PRIVATE_MEMSLOT;
6930 kvm_set_memory_region(kvm, &mem);
6931 }
6932 kvm_iommu_unmap_guest(kvm);
6933 kfree(kvm->arch.vpic);
6934 kfree(kvm->arch.vioapic);
6935 kvm_free_vcpus(kvm);
6936 if (kvm->arch.apic_access_page)
6937 put_page(kvm->arch.apic_access_page);
6938 if (kvm->arch.ept_identity_pagetable)
6939 put_page(kvm->arch.ept_identity_pagetable);
6940 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
6941}
6942
6943void kvm_arch_free_memslot(struct kvm_memory_slot *free,
6944 struct kvm_memory_slot *dont)
6945{
6946 int i;
6947
6948 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6949 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
6950 kvm_kvfree(free->arch.rmap[i]);
6951 free->arch.rmap[i] = NULL;
6952 }
6953 if (i == 0)
6954 continue;
6955
6956 if (!dont || free->arch.lpage_info[i - 1] !=
6957 dont->arch.lpage_info[i - 1]) {
6958 kvm_kvfree(free->arch.lpage_info[i - 1]);
6959 free->arch.lpage_info[i - 1] = NULL;
6960 }
6961 }
6962}
6963
6964int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
6965{
6966 int i;
6967
6968 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6969 unsigned long ugfn;
6970 int lpages;
6971 int level = i + 1;
6972
6973 lpages = gfn_to_index(slot->base_gfn + npages - 1,
6974 slot->base_gfn, level) + 1;
6975
6976 slot->arch.rmap[i] =
6977 kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
6978 if (!slot->arch.rmap[i])
6979 goto out_free;
6980 if (i == 0)
6981 continue;
6982
6983 slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
6984 sizeof(*slot->arch.lpage_info[i - 1]));
6985 if (!slot->arch.lpage_info[i - 1])
6986 goto out_free;
6987
6988 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
6989 slot->arch.lpage_info[i - 1][0].write_count = 1;
6990 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
6991 slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
6992 ugfn = slot->userspace_addr >> PAGE_SHIFT;
6993
6994
6995
6996
6997
6998 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
6999 !kvm_largepages_enabled()) {
7000 unsigned long j;
7001
7002 for (j = 0; j < lpages; ++j)
7003 slot->arch.lpage_info[i - 1][j].write_count = 1;
7004 }
7005 }
7006
7007 return 0;
7008
7009out_free:
7010 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7011 kvm_kvfree(slot->arch.rmap[i]);
7012 slot->arch.rmap[i] = NULL;
7013 if (i == 0)
7014 continue;
7015
7016 kvm_kvfree(slot->arch.lpage_info[i - 1]);
7017 slot->arch.lpage_info[i - 1] = NULL;
7018 }
7019 return -ENOMEM;
7020}
7021
7022int kvm_arch_prepare_memory_region(struct kvm *kvm,
7023 struct kvm_memory_slot *memslot,
7024 struct kvm_userspace_memory_region *mem,
7025 enum kvm_mr_change change)
7026{
7027
7028
7029
7030
7031 if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
7032 unsigned long userspace_addr;
7033
7034
7035
7036
7037
7038 userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
7039 PROT_READ | PROT_WRITE,
7040 MAP_SHARED | MAP_ANONYMOUS, 0);
7041
7042 if (IS_ERR((void *)userspace_addr))
7043 return PTR_ERR((void *)userspace_addr);
7044
7045 memslot->userspace_addr = userspace_addr;
7046 }
7047
7048 return 0;
7049}
7050
7051void kvm_arch_commit_memory_region(struct kvm *kvm,
7052 struct kvm_userspace_memory_region *mem,
7053 const struct kvm_memory_slot *old,
7054 enum kvm_mr_change change)
7055{
7056
7057 int nr_mmu_pages = 0;
7058
7059 if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
7060 int ret;
7061
7062 ret = vm_munmap(old->userspace_addr,
7063 old->npages * PAGE_SIZE);
7064 if (ret < 0)
7065 printk(KERN_WARNING
7066 "kvm_vm_ioctl_set_memory_region: "
7067 "failed to munmap memory\n");
7068 }
7069
7070 if (!kvm->arch.n_requested_mmu_pages)
7071 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
7072
7073 if (nr_mmu_pages)
7074 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
7075
7076
7077
7078
7079
7080 if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
7081 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
7082
7083
7084
7085
7086 kvm_mmu_invalidate_mmio_sptes(kvm);
7087}
7088
7089void kvm_arch_flush_shadow_all(struct kvm *kvm)
7090{
7091 kvm_mmu_invalidate_zap_all_pages(kvm);
7092}
7093
7094void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
7095 struct kvm_memory_slot *slot)
7096{
7097 kvm_mmu_invalidate_zap_all_pages(kvm);
7098}
7099
7100int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
7101{
7102 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7103 !vcpu->arch.apf.halted)
7104 || !list_empty_careful(&vcpu->async_pf.done)
7105 || kvm_apic_has_events(vcpu)
7106 || atomic_read(&vcpu->arch.nmi_queued) ||
7107 (kvm_arch_interrupt_allowed(vcpu) &&
7108 kvm_cpu_has_interrupt(vcpu));
7109}
7110
7111int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
7112{
7113 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
7114}
7115
7116int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
7117{
7118 return kvm_x86_ops->interrupt_allowed(vcpu);
7119}
7120
7121bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
7122{
7123 unsigned long current_rip = kvm_rip_read(vcpu) +
7124 get_segment_base(vcpu, VCPU_SREG_CS);
7125
7126 return current_rip == linear_rip;
7127}
7128EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
7129
7130unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
7131{
7132 unsigned long rflags;
7133
7134 rflags = kvm_x86_ops->get_rflags(vcpu);
7135 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7136 rflags &= ~X86_EFLAGS_TF;
7137 return rflags;
7138}
7139EXPORT_SYMBOL_GPL(kvm_get_rflags);
7140
7141void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7142{
7143 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
7144 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
7145 rflags |= X86_EFLAGS_TF;
7146 kvm_x86_ops->set_rflags(vcpu, rflags);
7147 kvm_make_request(KVM_REQ_EVENT, vcpu);
7148}
7149EXPORT_SYMBOL_GPL(kvm_set_rflags);
7150
7151void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
7152{
7153 int r;
7154
7155 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
7156 is_error_page(work->page))
7157 return;
7158
7159 r = kvm_mmu_reload(vcpu);
7160 if (unlikely(r))
7161 return;
7162
7163 if (!vcpu->arch.mmu.direct_map &&
7164 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
7165 return;
7166
7167 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
7168}
7169
7170static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
7171{
7172 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
7173}
7174
7175static inline u32 kvm_async_pf_next_probe(u32 key)
7176{
7177 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
7178}
7179
7180static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7181{
7182 u32 key = kvm_async_pf_hash_fn(gfn);
7183
7184 while (vcpu->arch.apf.gfns[key] != ~0)
7185 key = kvm_async_pf_next_probe(key);
7186
7187 vcpu->arch.apf.gfns[key] = gfn;
7188}
7189
7190static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
7191{
7192 int i;
7193 u32 key = kvm_async_pf_hash_fn(gfn);
7194
7195 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
7196 (vcpu->arch.apf.gfns[key] != gfn &&
7197 vcpu->arch.apf.gfns[key] != ~0); i++)
7198 key = kvm_async_pf_next_probe(key);
7199
7200 return key;
7201}
7202
7203bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7204{
7205 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
7206}
7207
7208static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7209{
7210 u32 i, j, k;
7211
7212 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
7213 while (true) {
7214 vcpu->arch.apf.gfns[i] = ~0;
7215 do {
7216 j = kvm_async_pf_next_probe(j);
7217 if (vcpu->arch.apf.gfns[j] == ~0)
7218 return;
7219 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
7220
7221
7222
7223
7224
7225 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
7226 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
7227 i = j;
7228 }
7229}
7230
7231static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
7232{
7233
7234 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
7235 sizeof(val));
7236}
7237
7238void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
7239 struct kvm_async_pf *work)
7240{
7241 struct x86_exception fault;
7242
7243 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
7244 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
7245
7246 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
7247 (vcpu->arch.apf.send_user_only &&
7248 kvm_x86_ops->get_cpl(vcpu) == 0))
7249 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
7250 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
7251 fault.vector = PF_VECTOR;
7252 fault.error_code_valid = true;
7253 fault.error_code = 0;
7254 fault.nested_page_fault = false;
7255 fault.address = work->arch.token;
7256 kvm_inject_page_fault(vcpu, &fault);
7257 }
7258}
7259
7260void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
7261 struct kvm_async_pf *work)
7262{
7263 struct x86_exception fault;
7264
7265 trace_kvm_async_pf_ready(work->arch.token, work->gva);
7266 if (is_error_page(work->page))
7267 work->arch.token = ~0;
7268 else
7269 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
7270
7271 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
7272 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
7273 fault.vector = PF_VECTOR;
7274 fault.error_code_valid = true;
7275 fault.error_code = 0;
7276 fault.nested_page_fault = false;
7277 fault.address = work->arch.token;
7278 kvm_inject_page_fault(vcpu, &fault);
7279 }
7280 vcpu->arch.apf.halted = false;
7281 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7282}
7283
7284bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
7285{
7286 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
7287 return true;
7288 else
7289 return !kvm_event_needs_reinjection(vcpu) &&
7290 kvm_x86_ops->interrupt_allowed(vcpu);
7291}
7292
7293EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
7294EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
7295EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
7296EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
7297EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
7298EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
7299EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
7300EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
7301EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
7302EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
7303EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7304EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7305EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
7306