1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30
31#include <linux/clocksource.h>
32#include <linux/interrupt.h>
33#include <linux/kvm.h>
34#include <linux/fs.h>
35#include <linux/vmalloc.h>
36#include <linux/module.h>
37#include <linux/mman.h>
38#include <linux/highmem.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/cpufreq.h>
42#include <linux/user-return-notifier.h>
43#include <linux/srcu.h>
44#include <linux/slab.h>
45#include <linux/perf_event.h>
46#include <linux/uaccess.h>
47#include <linux/hash.h>
48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
51#include <trace/events/kvm.h>
52
53#define CREATE_TRACE_POINTS
54#include "trace.h"
55
56#include <asm/debugreg.h>
57#include <asm/msr.h>
58#include <asm/desc.h>
59#include <asm/mtrr.h>
60#include <asm/mce.h>
61#include <asm/i387.h>
62#include <asm/fpu-internal.h>
63#include <asm/xcr.h>
64#include <asm/pvclock.h>
65#include <asm/div64.h>
66
67#define MAX_IO_MSRS 256
68#define KVM_MAX_MCE_BANKS 32
69#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
70
71#define emul_to_vcpu(ctxt) \
72 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
73
74
75
76
77
78#ifdef CONFIG_X86_64
79static
80u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
81#else
82static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
83#endif
84
85#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
86#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
87
88static void update_cr8_intercept(struct kvm_vcpu *vcpu);
89static void process_nmi(struct kvm_vcpu *vcpu);
90
91struct kvm_x86_ops *kvm_x86_ops;
92EXPORT_SYMBOL_GPL(kvm_x86_ops);
93
94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96
97bool kvm_has_tsc_control;
98EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
99u32 kvm_max_guest_tsc_khz;
100EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
101
102
103static u32 tsc_tolerance_ppm = 250;
104module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
105
106#define KVM_NR_SHARED_MSRS 16
107
108struct kvm_shared_msrs_global {
109 int nr;
110 u32 msrs[KVM_NR_SHARED_MSRS];
111};
112
113struct kvm_shared_msrs {
114 struct user_return_notifier urn;
115 bool registered;
116 struct kvm_shared_msr_values {
117 u64 host;
118 u64 curr;
119 } values[KVM_NR_SHARED_MSRS];
120};
121
122static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
123static struct kvm_shared_msrs __percpu *shared_msrs;
124
125struct kvm_stats_debugfs_item debugfs_entries[] = {
126 { "pf_fixed", VCPU_STAT(pf_fixed) },
127 { "pf_guest", VCPU_STAT(pf_guest) },
128 { "tlb_flush", VCPU_STAT(tlb_flush) },
129 { "invlpg", VCPU_STAT(invlpg) },
130 { "exits", VCPU_STAT(exits) },
131 { "io_exits", VCPU_STAT(io_exits) },
132 { "mmio_exits", VCPU_STAT(mmio_exits) },
133 { "signal_exits", VCPU_STAT(signal_exits) },
134 { "irq_window", VCPU_STAT(irq_window_exits) },
135 { "nmi_window", VCPU_STAT(nmi_window_exits) },
136 { "halt_exits", VCPU_STAT(halt_exits) },
137 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
138 { "hypercalls", VCPU_STAT(hypercalls) },
139 { "request_irq", VCPU_STAT(request_irq_exits) },
140 { "irq_exits", VCPU_STAT(irq_exits) },
141 { "host_state_reload", VCPU_STAT(host_state_reload) },
142 { "efer_reload", VCPU_STAT(efer_reload) },
143 { "fpu_reload", VCPU_STAT(fpu_reload) },
144 { "insn_emulation", VCPU_STAT(insn_emulation) },
145 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
146 { "irq_injections", VCPU_STAT(irq_injections) },
147 { "nmi_injections", VCPU_STAT(nmi_injections) },
148 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
149 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
150 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
151 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
152 { "mmu_flooded", VM_STAT(mmu_flooded) },
153 { "mmu_recycled", VM_STAT(mmu_recycled) },
154 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
155 { "mmu_unsync", VM_STAT(mmu_unsync) },
156 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
157 { "largepages", VM_STAT(lpages) },
158 { NULL }
159};
160
161u64 __read_mostly host_xcr0;
162
163static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
164
165static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
166{
167 int i;
168 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
169 vcpu->arch.apf.gfns[i] = ~0;
170}
171
172static void kvm_on_user_return(struct user_return_notifier *urn)
173{
174 unsigned slot;
175 struct kvm_shared_msrs *locals
176 = container_of(urn, struct kvm_shared_msrs, urn);
177 struct kvm_shared_msr_values *values;
178
179 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
180 values = &locals->values[slot];
181 if (values->host != values->curr) {
182 wrmsrl(shared_msrs_global.msrs[slot], values->host);
183 values->curr = values->host;
184 }
185 }
186 locals->registered = false;
187 user_return_notifier_unregister(urn);
188}
189
190static void shared_msr_update(unsigned slot, u32 msr)
191{
192 u64 value;
193 unsigned int cpu = smp_processor_id();
194 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
195
196
197
198 if (slot >= shared_msrs_global.nr) {
199 printk(KERN_ERR "kvm: invalid MSR slot!");
200 return;
201 }
202 rdmsrl_safe(msr, &value);
203 smsr->values[slot].host = value;
204 smsr->values[slot].curr = value;
205}
206
207void kvm_define_shared_msr(unsigned slot, u32 msr)
208{
209 if (slot >= shared_msrs_global.nr)
210 shared_msrs_global.nr = slot + 1;
211 shared_msrs_global.msrs[slot] = msr;
212
213 smp_wmb();
214}
215EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
216
217static void kvm_shared_msr_cpu_online(void)
218{
219 unsigned i;
220
221 for (i = 0; i < shared_msrs_global.nr; ++i)
222 shared_msr_update(i, shared_msrs_global.msrs[i]);
223}
224
225void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
226{
227 unsigned int cpu = smp_processor_id();
228 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
229
230 if (((value ^ smsr->values[slot].curr) & mask) == 0)
231 return;
232 smsr->values[slot].curr = value;
233 wrmsrl(shared_msrs_global.msrs[slot], value);
234 if (!smsr->registered) {
235 smsr->urn.on_user_return = kvm_on_user_return;
236 user_return_notifier_register(&smsr->urn);
237 smsr->registered = true;
238 }
239}
240EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
241
242static void drop_user_return_notifiers(void *ignore)
243{
244 unsigned int cpu = smp_processor_id();
245 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
246
247 if (smsr->registered)
248 kvm_on_user_return(&smsr->urn);
249}
250
251u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
252{
253 return vcpu->arch.apic_base;
254}
255EXPORT_SYMBOL_GPL(kvm_get_apic_base);
256
257void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
258{
259
260 kvm_lapic_set_base(vcpu, data);
261}
262EXPORT_SYMBOL_GPL(kvm_set_apic_base);
263
264asmlinkage void kvm_spurious_fault(void)
265{
266
267 BUG();
268}
269EXPORT_SYMBOL_GPL(kvm_spurious_fault);
270
271#define EXCPT_BENIGN 0
272#define EXCPT_CONTRIBUTORY 1
273#define EXCPT_PF 2
274
275static int exception_class(int vector)
276{
277 switch (vector) {
278 case PF_VECTOR:
279 return EXCPT_PF;
280 case DE_VECTOR:
281 case TS_VECTOR:
282 case NP_VECTOR:
283 case SS_VECTOR:
284 case GP_VECTOR:
285 return EXCPT_CONTRIBUTORY;
286 default:
287 break;
288 }
289 return EXCPT_BENIGN;
290}
291
292static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
293 unsigned nr, bool has_error, u32 error_code,
294 bool reinject)
295{
296 u32 prev_nr;
297 int class1, class2;
298
299 kvm_make_request(KVM_REQ_EVENT, vcpu);
300
301 if (!vcpu->arch.exception.pending) {
302 queue:
303 vcpu->arch.exception.pending = true;
304 vcpu->arch.exception.has_error_code = has_error;
305 vcpu->arch.exception.nr = nr;
306 vcpu->arch.exception.error_code = error_code;
307 vcpu->arch.exception.reinject = reinject;
308 return;
309 }
310
311
312 prev_nr = vcpu->arch.exception.nr;
313 if (prev_nr == DF_VECTOR) {
314
315 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
316 return;
317 }
318 class1 = exception_class(prev_nr);
319 class2 = exception_class(nr);
320 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
321 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
322
323 vcpu->arch.exception.pending = true;
324 vcpu->arch.exception.has_error_code = true;
325 vcpu->arch.exception.nr = DF_VECTOR;
326 vcpu->arch.exception.error_code = 0;
327 } else
328
329
330
331 goto queue;
332}
333
334void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
335{
336 kvm_multiple_exception(vcpu, nr, false, 0, false);
337}
338EXPORT_SYMBOL_GPL(kvm_queue_exception);
339
340void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
341{
342 kvm_multiple_exception(vcpu, nr, false, 0, true);
343}
344EXPORT_SYMBOL_GPL(kvm_requeue_exception);
345
346void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
347{
348 if (err)
349 kvm_inject_gp(vcpu, 0);
350 else
351 kvm_x86_ops->skip_emulated_instruction(vcpu);
352}
353EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
354
355void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
356{
357 ++vcpu->stat.pf_guest;
358 vcpu->arch.cr2 = fault->address;
359 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
360}
361EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
362
363void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
364{
365 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
366 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
367 else
368 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
369}
370
371void kvm_inject_nmi(struct kvm_vcpu *vcpu)
372{
373 atomic_inc(&vcpu->arch.nmi_queued);
374 kvm_make_request(KVM_REQ_NMI, vcpu);
375}
376EXPORT_SYMBOL_GPL(kvm_inject_nmi);
377
378void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
379{
380 kvm_multiple_exception(vcpu, nr, true, error_code, false);
381}
382EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
383
384void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
385{
386 kvm_multiple_exception(vcpu, nr, true, error_code, true);
387}
388EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
389
390
391
392
393
394bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
395{
396 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
397 return true;
398 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
399 return false;
400}
401EXPORT_SYMBOL_GPL(kvm_require_cpl);
402
403
404
405
406
407
408int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
409 gfn_t ngfn, void *data, int offset, int len,
410 u32 access)
411{
412 gfn_t real_gfn;
413 gpa_t ngpa;
414
415 ngpa = gfn_to_gpa(ngfn);
416 real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
417 if (real_gfn == UNMAPPED_GVA)
418 return -EFAULT;
419
420 real_gfn = gpa_to_gfn(real_gfn);
421
422 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
423}
424EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
425
426int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
427 void *data, int offset, int len, u32 access)
428{
429 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
430 data, offset, len, access);
431}
432
433
434
435
436int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
437{
438 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
439 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
440 int i;
441 int ret;
442 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
443
444 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
445 offset * sizeof(u64), sizeof(pdpte),
446 PFERR_USER_MASK|PFERR_WRITE_MASK);
447 if (ret < 0) {
448 ret = 0;
449 goto out;
450 }
451 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
452 if (is_present_gpte(pdpte[i]) &&
453 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
454 ret = 0;
455 goto out;
456 }
457 }
458 ret = 1;
459
460 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
461 __set_bit(VCPU_EXREG_PDPTR,
462 (unsigned long *)&vcpu->arch.regs_avail);
463 __set_bit(VCPU_EXREG_PDPTR,
464 (unsigned long *)&vcpu->arch.regs_dirty);
465out:
466
467 return ret;
468}
469EXPORT_SYMBOL_GPL(load_pdptrs);
470
471static bool pdptrs_changed(struct kvm_vcpu *vcpu)
472{
473 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
474 bool changed = true;
475 int offset;
476 gfn_t gfn;
477 int r;
478
479 if (is_long_mode(vcpu) || !is_pae(vcpu))
480 return false;
481
482 if (!test_bit(VCPU_EXREG_PDPTR,
483 (unsigned long *)&vcpu->arch.regs_avail))
484 return true;
485
486 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
487 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
488 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
489 PFERR_USER_MASK | PFERR_WRITE_MASK);
490 if (r < 0)
491 goto out;
492 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
493out:
494
495 return changed;
496}
497
498int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
499{
500 unsigned long old_cr0 = kvm_read_cr0(vcpu);
501 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
502 X86_CR0_CD | X86_CR0_NW;
503
504 cr0 |= X86_CR0_ET;
505
506#ifdef CONFIG_X86_64
507 if (cr0 & 0xffffffff00000000UL)
508 return 1;
509#endif
510
511 cr0 &= ~CR0_RESERVED_BITS;
512
513 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
514 return 1;
515
516 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
517 return 1;
518
519 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
520#ifdef CONFIG_X86_64
521 if ((vcpu->arch.efer & EFER_LME)) {
522 int cs_db, cs_l;
523
524 if (!is_pae(vcpu))
525 return 1;
526 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
527 if (cs_l)
528 return 1;
529 } else
530#endif
531 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
532 kvm_read_cr3(vcpu)))
533 return 1;
534 }
535
536 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
537 return 1;
538
539 kvm_x86_ops->set_cr0(vcpu, cr0);
540
541 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
542 kvm_clear_async_pf_completion_queue(vcpu);
543 kvm_async_pf_hash_reset(vcpu);
544 }
545
546 if ((cr0 ^ old_cr0) & update_bits)
547 kvm_mmu_reset_context(vcpu);
548 return 0;
549}
550EXPORT_SYMBOL_GPL(kvm_set_cr0);
551
552void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
553{
554 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
555}
556EXPORT_SYMBOL_GPL(kvm_lmsw);
557
558static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
559{
560 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
561 !vcpu->guest_xcr0_loaded) {
562
563 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
564 vcpu->guest_xcr0_loaded = 1;
565 }
566}
567
568static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
569{
570 if (vcpu->guest_xcr0_loaded) {
571 if (vcpu->arch.xcr0 != host_xcr0)
572 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
573 vcpu->guest_xcr0_loaded = 0;
574 }
575}
576
577int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
578{
579 u64 xcr0;
580
581
582 if (index != XCR_XFEATURE_ENABLED_MASK)
583 return 1;
584 xcr0 = xcr;
585 if (!(xcr0 & XSTATE_FP))
586 return 1;
587 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
588 return 1;
589 if (xcr0 & ~host_xcr0)
590 return 1;
591 kvm_put_guest_xcr0(vcpu);
592 vcpu->arch.xcr0 = xcr0;
593 return 0;
594}
595
596int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
597{
598 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
599 __kvm_set_xcr(vcpu, index, xcr)) {
600 kvm_inject_gp(vcpu, 0);
601 return 1;
602 }
603 return 0;
604}
605EXPORT_SYMBOL_GPL(kvm_set_xcr);
606
607int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
608{
609 unsigned long old_cr4 = kvm_read_cr4(vcpu);
610 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
611 X86_CR4_PAE | X86_CR4_SMEP;
612 if (cr4 & CR4_RESERVED_BITS)
613 return 1;
614
615 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
616 return 1;
617
618 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
619 return 1;
620
621 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
622 return 1;
623
624 if (is_long_mode(vcpu)) {
625 if (!(cr4 & X86_CR4_PAE))
626 return 1;
627 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
628 && ((cr4 ^ old_cr4) & pdptr_bits)
629 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
630 kvm_read_cr3(vcpu)))
631 return 1;
632
633 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
634 if (!guest_cpuid_has_pcid(vcpu))
635 return 1;
636
637
638 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
639 return 1;
640 }
641
642 if (kvm_x86_ops->set_cr4(vcpu, cr4))
643 return 1;
644
645 if (((cr4 ^ old_cr4) & pdptr_bits) ||
646 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
647 kvm_mmu_reset_context(vcpu);
648
649 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
650 kvm_update_cpuid(vcpu);
651
652 return 0;
653}
654EXPORT_SYMBOL_GPL(kvm_set_cr4);
655
656int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
657{
658 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
659 kvm_mmu_sync_roots(vcpu);
660 kvm_mmu_flush_tlb(vcpu);
661 return 0;
662 }
663
664 if (is_long_mode(vcpu)) {
665 if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
666 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
667 return 1;
668 } else
669 if (cr3 & CR3_L_MODE_RESERVED_BITS)
670 return 1;
671 } else {
672 if (is_pae(vcpu)) {
673 if (cr3 & CR3_PAE_RESERVED_BITS)
674 return 1;
675 if (is_paging(vcpu) &&
676 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
677 return 1;
678 }
679
680
681
682
683 }
684
685 vcpu->arch.cr3 = cr3;
686 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
687 vcpu->arch.mmu.new_cr3(vcpu);
688 return 0;
689}
690EXPORT_SYMBOL_GPL(kvm_set_cr3);
691
692int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
693{
694 if (cr8 & CR8_RESERVED_BITS)
695 return 1;
696 if (irqchip_in_kernel(vcpu->kvm))
697 kvm_lapic_set_tpr(vcpu, cr8);
698 else
699 vcpu->arch.cr8 = cr8;
700 return 0;
701}
702EXPORT_SYMBOL_GPL(kvm_set_cr8);
703
704unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
705{
706 if (irqchip_in_kernel(vcpu->kvm))
707 return kvm_lapic_get_cr8(vcpu);
708 else
709 return vcpu->arch.cr8;
710}
711EXPORT_SYMBOL_GPL(kvm_get_cr8);
712
713static void kvm_update_dr7(struct kvm_vcpu *vcpu)
714{
715 unsigned long dr7;
716
717 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
718 dr7 = vcpu->arch.guest_debug_dr7;
719 else
720 dr7 = vcpu->arch.dr7;
721 kvm_x86_ops->set_dr7(vcpu, dr7);
722 vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
723}
724
725static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
726{
727 switch (dr) {
728 case 0 ... 3:
729 vcpu->arch.db[dr] = val;
730 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
731 vcpu->arch.eff_db[dr] = val;
732 break;
733 case 4:
734 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
735 return 1;
736
737 case 6:
738 if (val & 0xffffffff00000000ULL)
739 return -1;
740 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
741 break;
742 case 5:
743 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
744 return 1;
745
746 default:
747 if (val & 0xffffffff00000000ULL)
748 return -1;
749 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
750 kvm_update_dr7(vcpu);
751 break;
752 }
753
754 return 0;
755}
756
757int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
758{
759 int res;
760
761 res = __kvm_set_dr(vcpu, dr, val);
762 if (res > 0)
763 kvm_queue_exception(vcpu, UD_VECTOR);
764 else if (res < 0)
765 kvm_inject_gp(vcpu, 0);
766
767 return res;
768}
769EXPORT_SYMBOL_GPL(kvm_set_dr);
770
771static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
772{
773 switch (dr) {
774 case 0 ... 3:
775 *val = vcpu->arch.db[dr];
776 break;
777 case 4:
778 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
779 return 1;
780
781 case 6:
782 *val = vcpu->arch.dr6;
783 break;
784 case 5:
785 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
786 return 1;
787
788 default:
789 *val = vcpu->arch.dr7;
790 break;
791 }
792
793 return 0;
794}
795
796int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
797{
798 if (_kvm_get_dr(vcpu, dr, val)) {
799 kvm_queue_exception(vcpu, UD_VECTOR);
800 return 1;
801 }
802 return 0;
803}
804EXPORT_SYMBOL_GPL(kvm_get_dr);
805
806bool kvm_rdpmc(struct kvm_vcpu *vcpu)
807{
808 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
809 u64 data;
810 int err;
811
812 err = kvm_pmu_read_pmc(vcpu, ecx, &data);
813 if (err)
814 return err;
815 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
816 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
817 return err;
818}
819EXPORT_SYMBOL_GPL(kvm_rdpmc);
820
821
822
823
824
825
826
827
828
829
830#define KVM_SAVE_MSRS_BEGIN 10
831static u32 msrs_to_save[] = {
832 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
833 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
834 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
835 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
836 MSR_KVM_PV_EOI_EN,
837 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
838 MSR_STAR,
839#ifdef CONFIG_X86_64
840 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
841#endif
842 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
843 MSR_IA32_FEATURE_CONTROL
844};
845
846static unsigned num_msrs_to_save;
847
848static const u32 emulated_msrs[] = {
849 MSR_IA32_TSC_ADJUST,
850 MSR_IA32_TSCDEADLINE,
851 MSR_IA32_MISC_ENABLE,
852 MSR_IA32_MCG_STATUS,
853 MSR_IA32_MCG_CTL,
854};
855
856bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
857{
858 if (efer & efer_reserved_bits)
859 return false;
860
861 if (efer & EFER_FFXSR) {
862 struct kvm_cpuid_entry2 *feat;
863
864 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
865 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
866 return false;
867 }
868
869 if (efer & EFER_SVME) {
870 struct kvm_cpuid_entry2 *feat;
871
872 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
873 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
874 return false;
875 }
876
877 return true;
878}
879EXPORT_SYMBOL_GPL(kvm_valid_efer);
880
881static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
882{
883 u64 old_efer = vcpu->arch.efer;
884
885 if (!kvm_valid_efer(vcpu, efer))
886 return 1;
887
888 if (is_paging(vcpu)
889 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
890 return 1;
891
892 efer &= ~EFER_LMA;
893 efer |= vcpu->arch.efer & EFER_LMA;
894
895 kvm_x86_ops->set_efer(vcpu, efer);
896
897
898 if ((efer ^ old_efer) & EFER_NX)
899 kvm_mmu_reset_context(vcpu);
900
901 return 0;
902}
903
904void kvm_enable_efer_bits(u64 mask)
905{
906 efer_reserved_bits &= ~mask;
907}
908EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
909
910
911
912
913
914
915
916int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
917{
918 return kvm_x86_ops->set_msr(vcpu, msr);
919}
920
921
922
923
924static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
925{
926 struct msr_data msr;
927
928 msr.data = *data;
929 msr.index = index;
930 msr.host_initiated = true;
931 return kvm_set_msr(vcpu, &msr);
932}
933
934#ifdef CONFIG_X86_64
935struct pvclock_gtod_data {
936 seqcount_t seq;
937
938 struct {
939 int vclock_mode;
940 cycle_t cycle_last;
941 cycle_t mask;
942 u32 mult;
943 u32 shift;
944 } clock;
945
946
947 u64 monotonic_time_snsec;
948 time_t monotonic_time_sec;
949};
950
951static struct pvclock_gtod_data pvclock_gtod_data;
952
953static void update_pvclock_gtod(struct timekeeper *tk)
954{
955 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
956
957 write_seqcount_begin(&vdata->seq);
958
959
960 vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
961 vdata->clock.cycle_last = tk->clock->cycle_last;
962 vdata->clock.mask = tk->clock->mask;
963 vdata->clock.mult = tk->mult;
964 vdata->clock.shift = tk->shift;
965
966 vdata->monotonic_time_sec = tk->xtime_sec
967 + tk->wall_to_monotonic.tv_sec;
968 vdata->monotonic_time_snsec = tk->xtime_nsec
969 + (tk->wall_to_monotonic.tv_nsec
970 << tk->shift);
971 while (vdata->monotonic_time_snsec >=
972 (((u64)NSEC_PER_SEC) << tk->shift)) {
973 vdata->monotonic_time_snsec -=
974 ((u64)NSEC_PER_SEC) << tk->shift;
975 vdata->monotonic_time_sec++;
976 }
977
978 write_seqcount_end(&vdata->seq);
979}
980#endif
981
982
983static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
984{
985 int version;
986 int r;
987 struct pvclock_wall_clock wc;
988 struct timespec boot;
989
990 if (!wall_clock)
991 return;
992
993 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
994 if (r)
995 return;
996
997 if (version & 1)
998 ++version;
999
1000 ++version;
1001
1002 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1003
1004
1005
1006
1007
1008
1009
1010 getboottime(&boot);
1011
1012 if (kvm->arch.kvmclock_offset) {
1013 struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
1014 boot = timespec_sub(boot, ts);
1015 }
1016 wc.sec = boot.tv_sec;
1017 wc.nsec = boot.tv_nsec;
1018 wc.version = version;
1019
1020 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1021
1022 version++;
1023 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1024}
1025
1026static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1027{
1028 uint32_t quotient, remainder;
1029
1030
1031
1032 __asm__ ( "divl %4"
1033 : "=a" (quotient), "=d" (remainder)
1034 : "0" (0), "1" (dividend), "r" (divisor) );
1035 return quotient;
1036}
1037
1038static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1039 s8 *pshift, u32 *pmultiplier)
1040{
1041 uint64_t scaled64;
1042 int32_t shift = 0;
1043 uint64_t tps64;
1044 uint32_t tps32;
1045
1046 tps64 = base_khz * 1000LL;
1047 scaled64 = scaled_khz * 1000LL;
1048 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1049 tps64 >>= 1;
1050 shift--;
1051 }
1052
1053 tps32 = (uint32_t)tps64;
1054 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1055 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1056 scaled64 >>= 1;
1057 else
1058 tps32 <<= 1;
1059 shift++;
1060 }
1061
1062 *pshift = shift;
1063 *pmultiplier = div_frac(scaled64, tps32);
1064
1065 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
1066 __func__, base_khz, scaled_khz, shift, *pmultiplier);
1067}
1068
1069static inline u64 get_kernel_ns(void)
1070{
1071 struct timespec ts;
1072
1073 WARN_ON(preemptible());
1074 ktime_get_ts(&ts);
1075 monotonic_to_bootbased(&ts);
1076 return timespec_to_ns(&ts);
1077}
1078
1079#ifdef CONFIG_X86_64
1080static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1081#endif
1082
1083static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1084unsigned long max_tsc_khz;
1085
1086static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
1087{
1088 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
1089 vcpu->arch.virtual_tsc_shift);
1090}
1091
1092static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1093{
1094 u64 v = (u64)khz * (1000000 + ppm);
1095 do_div(v, 1000000);
1096 return v;
1097}
1098
1099static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1100{
1101 u32 thresh_lo, thresh_hi;
1102 int use_scaling = 0;
1103
1104
1105 if (this_tsc_khz == 0)
1106 return;
1107
1108
1109 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1110 &vcpu->arch.virtual_tsc_shift,
1111 &vcpu->arch.virtual_tsc_mult);
1112 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
1113
1114
1115
1116
1117
1118
1119
1120 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1121 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1122 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1123 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1124 use_scaling = 1;
1125 }
1126 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1127}
1128
1129static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1130{
1131 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1132 vcpu->arch.virtual_tsc_mult,
1133 vcpu->arch.virtual_tsc_shift);
1134 tsc += vcpu->arch.this_tsc_write;
1135 return tsc;
1136}
1137
1138void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1139{
1140#ifdef CONFIG_X86_64
1141 bool vcpus_matched;
1142 bool do_request = false;
1143 struct kvm_arch *ka = &vcpu->kvm->arch;
1144 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1145
1146 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1147 atomic_read(&vcpu->kvm->online_vcpus));
1148
1149 if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
1150 if (!ka->use_master_clock)
1151 do_request = 1;
1152
1153 if (!vcpus_matched && ka->use_master_clock)
1154 do_request = 1;
1155
1156 if (do_request)
1157 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1158
1159 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1160 atomic_read(&vcpu->kvm->online_vcpus),
1161 ka->use_master_clock, gtod->clock.vclock_mode);
1162#endif
1163}
1164
1165static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1166{
1167 u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
1168 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1169}
1170
1171void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1172{
1173 struct kvm *kvm = vcpu->kvm;
1174 u64 offset, ns, elapsed;
1175 unsigned long flags;
1176 s64 usdiff;
1177 bool matched;
1178 u64 data = msr->data;
1179
1180 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1181 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1182 ns = get_kernel_ns();
1183 elapsed = ns - kvm->arch.last_tsc_nsec;
1184
1185 if (vcpu->arch.virtual_tsc_khz) {
1186 int faulted = 0;
1187
1188
1189 usdiff = data - kvm->arch.last_tsc_write;
1190#ifdef CONFIG_X86_64
1191 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1192#else
1193
1194 asm("1: idivl %[divisor]\n"
1195 "2: xor %%edx, %%edx\n"
1196 " movl $0, %[faulted]\n"
1197 "3:\n"
1198 ".section .fixup,\"ax\"\n"
1199 "4: movl $1, %[faulted]\n"
1200 " jmp 3b\n"
1201 ".previous\n"
1202
1203 _ASM_EXTABLE(1b, 4b)
1204
1205 : "=A"(usdiff), [faulted] "=r" (faulted)
1206 : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
1207
1208#endif
1209 do_div(elapsed, 1000);
1210 usdiff -= elapsed;
1211 if (usdiff < 0)
1212 usdiff = -usdiff;
1213
1214
1215 if (faulted)
1216 usdiff = USEC_PER_SEC;
1217 } else
1218 usdiff = USEC_PER_SEC;
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230 if (usdiff < USEC_PER_SEC &&
1231 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1232 if (!check_tsc_unstable()) {
1233 offset = kvm->arch.cur_tsc_offset;
1234 pr_debug("kvm: matched tsc offset for %llu\n", data);
1235 } else {
1236 u64 delta = nsec_to_cycles(vcpu, elapsed);
1237 data += delta;
1238 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1239 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1240 }
1241 matched = true;
1242 } else {
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252 kvm->arch.cur_tsc_generation++;
1253 kvm->arch.cur_tsc_nsec = ns;
1254 kvm->arch.cur_tsc_write = data;
1255 kvm->arch.cur_tsc_offset = offset;
1256 matched = false;
1257 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1258 kvm->arch.cur_tsc_generation, data);
1259 }
1260
1261
1262
1263
1264
1265 kvm->arch.last_tsc_nsec = ns;
1266 kvm->arch.last_tsc_write = data;
1267 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1268
1269
1270 vcpu->arch.hv_clock.tsc_timestamp = 0;
1271 vcpu->arch.last_guest_tsc = data;
1272
1273
1274 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1275 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1276 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1277
1278 if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
1279 update_ia32_tsc_adjust_msr(vcpu, offset);
1280 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1281 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1282
1283 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1284 if (matched)
1285 kvm->arch.nr_vcpus_matched_tsc++;
1286 else
1287 kvm->arch.nr_vcpus_matched_tsc = 0;
1288
1289 kvm_track_tsc_matching(vcpu);
1290 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1291}
1292
1293EXPORT_SYMBOL_GPL(kvm_write_tsc);
1294
1295#ifdef CONFIG_X86_64
1296
1297static cycle_t read_tsc(void)
1298{
1299 cycle_t ret;
1300 u64 last;
1301
1302
1303
1304
1305
1306
1307
1308
1309 rdtsc_barrier();
1310 ret = (cycle_t)vget_cycles();
1311
1312 last = pvclock_gtod_data.clock.cycle_last;
1313
1314 if (likely(ret >= last))
1315 return ret;
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325 asm volatile ("");
1326 return last;
1327}
1328
1329static inline u64 vgettsc(cycle_t *cycle_now)
1330{
1331 long v;
1332 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1333
1334 *cycle_now = read_tsc();
1335
1336 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1337 return v * gtod->clock.mult;
1338}
1339
1340static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
1341{
1342 unsigned long seq;
1343 u64 ns;
1344 int mode;
1345 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1346
1347 ts->tv_nsec = 0;
1348 do {
1349 seq = read_seqcount_begin(>od->seq);
1350 mode = gtod->clock.vclock_mode;
1351 ts->tv_sec = gtod->monotonic_time_sec;
1352 ns = gtod->monotonic_time_snsec;
1353 ns += vgettsc(cycle_now);
1354 ns >>= gtod->clock.shift;
1355 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1356 timespec_add_ns(ts, ns);
1357
1358 return mode;
1359}
1360
1361
1362static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1363{
1364 struct timespec ts;
1365
1366
1367 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1368 return false;
1369
1370 if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
1371 return false;
1372
1373 monotonic_to_bootbased(&ts);
1374 *kernel_ns = timespec_to_ns(&ts);
1375
1376 return true;
1377}
1378#endif
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1422{
1423#ifdef CONFIG_X86_64
1424 struct kvm_arch *ka = &kvm->arch;
1425 int vclock_mode;
1426 bool host_tsc_clocksource, vcpus_matched;
1427
1428 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1429 atomic_read(&kvm->online_vcpus));
1430
1431
1432
1433
1434
1435 host_tsc_clocksource = kvm_get_time_and_clockread(
1436 &ka->master_kernel_ns,
1437 &ka->master_cycle_now);
1438
1439 ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
1440
1441 if (ka->use_master_clock)
1442 atomic_set(&kvm_guest_has_master_clock, 1);
1443
1444 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1445 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1446 vcpus_matched);
1447#endif
1448}
1449
1450static void kvm_gen_update_masterclock(struct kvm *kvm)
1451{
1452#ifdef CONFIG_X86_64
1453 int i;
1454 struct kvm_vcpu *vcpu;
1455 struct kvm_arch *ka = &kvm->arch;
1456
1457 spin_lock(&ka->pvclock_gtod_sync_lock);
1458 kvm_make_mclock_inprogress_request(kvm);
1459
1460 pvclock_update_vm_gtod_copy(kvm);
1461
1462 kvm_for_each_vcpu(i, vcpu, kvm)
1463 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
1464
1465
1466 kvm_for_each_vcpu(i, vcpu, kvm)
1467 clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
1468
1469 spin_unlock(&ka->pvclock_gtod_sync_lock);
1470#endif
1471}
1472
1473static int kvm_guest_time_update(struct kvm_vcpu *v)
1474{
1475 unsigned long flags, this_tsc_khz;
1476 struct kvm_vcpu_arch *vcpu = &v->arch;
1477 struct kvm_arch *ka = &v->kvm->arch;
1478 s64 kernel_ns, max_kernel_ns;
1479 u64 tsc_timestamp, host_tsc;
1480 struct pvclock_vcpu_time_info guest_hv_clock;
1481 u8 pvclock_flags;
1482 bool use_master_clock;
1483
1484 kernel_ns = 0;
1485 host_tsc = 0;
1486
1487
1488
1489
1490
1491 spin_lock(&ka->pvclock_gtod_sync_lock);
1492 use_master_clock = ka->use_master_clock;
1493 if (use_master_clock) {
1494 host_tsc = ka->master_cycle_now;
1495 kernel_ns = ka->master_kernel_ns;
1496 }
1497 spin_unlock(&ka->pvclock_gtod_sync_lock);
1498
1499
1500 local_irq_save(flags);
1501 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1502 if (unlikely(this_tsc_khz == 0)) {
1503 local_irq_restore(flags);
1504 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1505 return 1;
1506 }
1507 if (!use_master_clock) {
1508 host_tsc = native_read_tsc();
1509 kernel_ns = get_kernel_ns();
1510 }
1511
1512 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524 if (vcpu->tsc_catchup) {
1525 u64 tsc = compute_guest_tsc(v, kernel_ns);
1526 if (tsc > tsc_timestamp) {
1527 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1528 tsc_timestamp = tsc;
1529 }
1530 }
1531
1532 local_irq_restore(flags);
1533
1534 if (!vcpu->pv_time_enabled)
1535 return 0;
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558 max_kernel_ns = 0;
1559 if (vcpu->hv_clock.tsc_timestamp) {
1560 max_kernel_ns = vcpu->last_guest_tsc -
1561 vcpu->hv_clock.tsc_timestamp;
1562 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1563 vcpu->hv_clock.tsc_to_system_mul,
1564 vcpu->hv_clock.tsc_shift);
1565 max_kernel_ns += vcpu->last_kernel_ns;
1566 }
1567
1568 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1569 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1570 &vcpu->hv_clock.tsc_shift,
1571 &vcpu->hv_clock.tsc_to_system_mul);
1572 vcpu->hw_tsc_khz = this_tsc_khz;
1573 }
1574
1575
1576
1577
1578
1579 if (!use_master_clock) {
1580 if (max_kernel_ns > kernel_ns)
1581 kernel_ns = max_kernel_ns;
1582 }
1583
1584 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1585 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1586 vcpu->last_kernel_ns = kernel_ns;
1587 vcpu->last_guest_tsc = tsc_timestamp;
1588
1589
1590
1591
1592
1593
1594 vcpu->hv_clock.version += 2;
1595
1596 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1597 &guest_hv_clock, sizeof(guest_hv_clock))))
1598 return 0;
1599
1600
1601 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
1602
1603 if (vcpu->pvclock_set_guest_stopped_request) {
1604 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1605 vcpu->pvclock_set_guest_stopped_request = false;
1606 }
1607
1608
1609 if (use_master_clock)
1610 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1611
1612 vcpu->hv_clock.flags = pvclock_flags;
1613
1614 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1615 &vcpu->hv_clock,
1616 sizeof(vcpu->hv_clock));
1617 return 0;
1618}
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1633{
1634 int i;
1635 struct kvm *kvm = v->kvm;
1636 struct kvm_vcpu *vcpu;
1637
1638 kvm_for_each_vcpu(i, vcpu, kvm) {
1639 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
1640 kvm_vcpu_kick(vcpu);
1641 }
1642}
1643
1644static bool msr_mtrr_valid(unsigned msr)
1645{
1646 switch (msr) {
1647 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1648 case MSR_MTRRfix64K_00000:
1649 case MSR_MTRRfix16K_80000:
1650 case MSR_MTRRfix16K_A0000:
1651 case MSR_MTRRfix4K_C0000:
1652 case MSR_MTRRfix4K_C8000:
1653 case MSR_MTRRfix4K_D0000:
1654 case MSR_MTRRfix4K_D8000:
1655 case MSR_MTRRfix4K_E0000:
1656 case MSR_MTRRfix4K_E8000:
1657 case MSR_MTRRfix4K_F0000:
1658 case MSR_MTRRfix4K_F8000:
1659 case MSR_MTRRdefType:
1660 case MSR_IA32_CR_PAT:
1661 return true;
1662 case 0x2f8:
1663 return true;
1664 }
1665 return false;
1666}
1667
1668static bool valid_pat_type(unsigned t)
1669{
1670 return t < 8 && (1 << t) & 0xf3;
1671}
1672
1673static bool valid_mtrr_type(unsigned t)
1674{
1675 return t < 8 && (1 << t) & 0x73;
1676}
1677
1678static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1679{
1680 int i;
1681
1682 if (!msr_mtrr_valid(msr))
1683 return false;
1684
1685 if (msr == MSR_IA32_CR_PAT) {
1686 for (i = 0; i < 8; i++)
1687 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1688 return false;
1689 return true;
1690 } else if (msr == MSR_MTRRdefType) {
1691 if (data & ~0xcff)
1692 return false;
1693 return valid_mtrr_type(data & 0xff);
1694 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1695 for (i = 0; i < 8 ; i++)
1696 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1697 return false;
1698 return true;
1699 }
1700
1701
1702 return valid_mtrr_type(data & 0xff);
1703}
1704
1705static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1706{
1707 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1708
1709 if (!mtrr_valid(vcpu, msr, data))
1710 return 1;
1711
1712 if (msr == MSR_MTRRdefType) {
1713 vcpu->arch.mtrr_state.def_type = data;
1714 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1715 } else if (msr == MSR_MTRRfix64K_00000)
1716 p[0] = data;
1717 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1718 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1719 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1720 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1721 else if (msr == MSR_IA32_CR_PAT)
1722 vcpu->arch.pat = data;
1723 else {
1724 int idx, is_mtrr_mask;
1725 u64 *pt;
1726
1727 idx = (msr - 0x200) / 2;
1728 is_mtrr_mask = msr - 0x200 - 2 * idx;
1729 if (!is_mtrr_mask)
1730 pt =
1731 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1732 else
1733 pt =
1734 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1735 *pt = data;
1736 }
1737
1738 kvm_mmu_reset_context(vcpu);
1739 return 0;
1740}
1741
1742static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1743{
1744 u64 mcg_cap = vcpu->arch.mcg_cap;
1745 unsigned bank_num = mcg_cap & 0xff;
1746
1747 switch (msr) {
1748 case MSR_IA32_MCG_STATUS:
1749 vcpu->arch.mcg_status = data;
1750 break;
1751 case MSR_IA32_MCG_CTL:
1752 if (!(mcg_cap & MCG_CTL_P))
1753 return 1;
1754 if (data != 0 && data != ~(u64)0)
1755 return -1;
1756 vcpu->arch.mcg_ctl = data;
1757 break;
1758 default:
1759 if (msr >= MSR_IA32_MC0_CTL &&
1760 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1761 u32 offset = msr - MSR_IA32_MC0_CTL;
1762
1763
1764
1765
1766
1767 if ((offset & 0x3) == 0 &&
1768 data != 0 && (data | (1 << 10)) != ~(u64)0)
1769 return -1;
1770 vcpu->arch.mce_banks[offset] = data;
1771 break;
1772 }
1773 return 1;
1774 }
1775 return 0;
1776}
1777
1778static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1779{
1780 struct kvm *kvm = vcpu->kvm;
1781 int lm = is_long_mode(vcpu);
1782 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1783 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1784 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1785 : kvm->arch.xen_hvm_config.blob_size_32;
1786 u32 page_num = data & ~PAGE_MASK;
1787 u64 page_addr = data & PAGE_MASK;
1788 u8 *page;
1789 int r;
1790
1791 r = -E2BIG;
1792 if (page_num >= blob_size)
1793 goto out;
1794 r = -ENOMEM;
1795 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
1796 if (IS_ERR(page)) {
1797 r = PTR_ERR(page);
1798 goto out;
1799 }
1800 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1801 goto out_free;
1802 r = 0;
1803out_free:
1804 kfree(page);
1805out:
1806 return r;
1807}
1808
1809static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1810{
1811 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1812}
1813
1814static bool kvm_hv_msr_partition_wide(u32 msr)
1815{
1816 bool r = false;
1817 switch (msr) {
1818 case HV_X64_MSR_GUEST_OS_ID:
1819 case HV_X64_MSR_HYPERCALL:
1820 r = true;
1821 break;
1822 }
1823
1824 return r;
1825}
1826
1827static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1828{
1829 struct kvm *kvm = vcpu->kvm;
1830
1831 switch (msr) {
1832 case HV_X64_MSR_GUEST_OS_ID:
1833 kvm->arch.hv_guest_os_id = data;
1834
1835 if (!kvm->arch.hv_guest_os_id)
1836 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1837 break;
1838 case HV_X64_MSR_HYPERCALL: {
1839 u64 gfn;
1840 unsigned long addr;
1841 u8 instructions[4];
1842
1843
1844 if (!kvm->arch.hv_guest_os_id)
1845 break;
1846 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1847 kvm->arch.hv_hypercall = data;
1848 break;
1849 }
1850 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1851 addr = gfn_to_hva(kvm, gfn);
1852 if (kvm_is_error_hva(addr))
1853 return 1;
1854 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1855 ((unsigned char *)instructions)[3] = 0xc3;
1856 if (__copy_to_user((void __user *)addr, instructions, 4))
1857 return 1;
1858 kvm->arch.hv_hypercall = data;
1859 break;
1860 }
1861 default:
1862 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1863 "data 0x%llx\n", msr, data);
1864 return 1;
1865 }
1866 return 0;
1867}
1868
1869static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1870{
1871 switch (msr) {
1872 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1873 unsigned long addr;
1874
1875 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1876 vcpu->arch.hv_vapic = data;
1877 break;
1878 }
1879 addr = gfn_to_hva(vcpu->kvm, data >>
1880 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1881 if (kvm_is_error_hva(addr))
1882 return 1;
1883 if (__clear_user((void __user *)addr, PAGE_SIZE))
1884 return 1;
1885 vcpu->arch.hv_vapic = data;
1886 break;
1887 }
1888 case HV_X64_MSR_EOI:
1889 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1890 case HV_X64_MSR_ICR:
1891 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1892 case HV_X64_MSR_TPR:
1893 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1894 default:
1895 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1896 "data 0x%llx\n", msr, data);
1897 return 1;
1898 }
1899
1900 return 0;
1901}
1902
1903static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1904{
1905 gpa_t gpa = data & ~0x3f;
1906
1907
1908 if (data & 0x3c)
1909 return 1;
1910
1911 vcpu->arch.apf.msr_val = data;
1912
1913 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1914 kvm_clear_async_pf_completion_queue(vcpu);
1915 kvm_async_pf_hash_reset(vcpu);
1916 return 0;
1917 }
1918
1919 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
1920 sizeof(u32)))
1921 return 1;
1922
1923 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1924 kvm_async_pf_wakeup_all(vcpu);
1925 return 0;
1926}
1927
1928static void kvmclock_reset(struct kvm_vcpu *vcpu)
1929{
1930 vcpu->arch.pv_time_enabled = false;
1931}
1932
1933static void accumulate_steal_time(struct kvm_vcpu *vcpu)
1934{
1935 u64 delta;
1936
1937 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1938 return;
1939
1940 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
1941 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1942 vcpu->arch.st.accum_steal = delta;
1943}
1944
1945static void record_steal_time(struct kvm_vcpu *vcpu)
1946{
1947 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1948 return;
1949
1950 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1951 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
1952 return;
1953
1954 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
1955 vcpu->arch.st.steal.version += 2;
1956 vcpu->arch.st.accum_steal = 0;
1957
1958 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1959 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1960}
1961
1962int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1963{
1964 bool pr = false;
1965 u32 msr = msr_info->index;
1966 u64 data = msr_info->data;
1967
1968 switch (msr) {
1969 case MSR_AMD64_NB_CFG:
1970 case MSR_IA32_UCODE_REV:
1971 case MSR_IA32_UCODE_WRITE:
1972 case MSR_VM_HSAVE_PA:
1973 case MSR_AMD64_PATCH_LOADER:
1974 case MSR_AMD64_BU_CFG2:
1975 break;
1976
1977 case MSR_EFER:
1978 return set_efer(vcpu, data);
1979 case MSR_K7_HWCR:
1980 data &= ~(u64)0x40;
1981 data &= ~(u64)0x100;
1982 data &= ~(u64)0x8;
1983 if (data != 0) {
1984 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1985 data);
1986 return 1;
1987 }
1988 break;
1989 case MSR_FAM10H_MMIO_CONF_BASE:
1990 if (data != 0) {
1991 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1992 "0x%llx\n", data);
1993 return 1;
1994 }
1995 break;
1996 case MSR_IA32_DEBUGCTLMSR:
1997 if (!data) {
1998
1999 break;
2000 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2001
2002
2003 return 1;
2004 }
2005 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2006 __func__, data);
2007 break;
2008 case 0x200 ... 0x2ff:
2009 return set_msr_mtrr(vcpu, msr, data);
2010 case MSR_IA32_APICBASE:
2011 kvm_set_apic_base(vcpu, data);
2012 break;
2013 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2014 return kvm_x2apic_msr_write(vcpu, msr, data);
2015 case MSR_IA32_TSCDEADLINE:
2016 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2017 break;
2018 case MSR_IA32_TSC_ADJUST:
2019 if (guest_cpuid_has_tsc_adjust(vcpu)) {
2020 if (!msr_info->host_initiated) {
2021 u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2022 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
2023 }
2024 vcpu->arch.ia32_tsc_adjust_msr = data;
2025 }
2026 break;
2027 case MSR_IA32_MISC_ENABLE:
2028 vcpu->arch.ia32_misc_enable_msr = data;
2029 break;
2030 case MSR_KVM_WALL_CLOCK_NEW:
2031 case MSR_KVM_WALL_CLOCK:
2032 vcpu->kvm->arch.wall_clock = data;
2033 kvm_write_wall_clock(vcpu->kvm, data);
2034 break;
2035 case MSR_KVM_SYSTEM_TIME_NEW:
2036 case MSR_KVM_SYSTEM_TIME: {
2037 u64 gpa_offset;
2038 kvmclock_reset(vcpu);
2039
2040 vcpu->arch.time = data;
2041 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2042
2043
2044 if (!(data & 1))
2045 break;
2046
2047 gpa_offset = data & ~(PAGE_MASK | 1);
2048
2049 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2050 &vcpu->arch.pv_time, data & ~1ULL,
2051 sizeof(struct pvclock_vcpu_time_info)))
2052 vcpu->arch.pv_time_enabled = false;
2053 else
2054 vcpu->arch.pv_time_enabled = true;
2055
2056 break;
2057 }
2058 case MSR_KVM_ASYNC_PF_EN:
2059 if (kvm_pv_enable_async_pf(vcpu, data))
2060 return 1;
2061 break;
2062 case MSR_KVM_STEAL_TIME:
2063
2064 if (unlikely(!sched_info_on()))
2065 return 1;
2066
2067 if (data & KVM_STEAL_RESERVED_MASK)
2068 return 1;
2069
2070 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2071 data & KVM_STEAL_VALID_BITS,
2072 sizeof(struct kvm_steal_time)))
2073 return 1;
2074
2075 vcpu->arch.st.msr_val = data;
2076
2077 if (!(data & KVM_MSR_ENABLED))
2078 break;
2079
2080 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2081
2082 preempt_disable();
2083 accumulate_steal_time(vcpu);
2084 preempt_enable();
2085
2086 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2087
2088 break;
2089 case MSR_KVM_PV_EOI_EN:
2090 if (kvm_lapic_enable_pv_eoi(vcpu, data))
2091 return 1;
2092 break;
2093
2094 case MSR_IA32_MCG_CTL:
2095 case MSR_IA32_MCG_STATUS:
2096 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2097 return set_msr_mce(vcpu, msr, data);
2098
2099
2100
2101
2102
2103
2104
2105
2106 case MSR_K7_EVNTSEL0:
2107 case MSR_K7_EVNTSEL1:
2108 case MSR_K7_EVNTSEL2:
2109 case MSR_K7_EVNTSEL3:
2110 if (data != 0)
2111 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2112 "0x%x data 0x%llx\n", msr, data);
2113 break;
2114
2115
2116
2117 case MSR_K7_PERFCTR0:
2118 case MSR_K7_PERFCTR1:
2119 case MSR_K7_PERFCTR2:
2120 case MSR_K7_PERFCTR3:
2121 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2122 "0x%x data 0x%llx\n", msr, data);
2123 break;
2124 case MSR_P6_PERFCTR0:
2125 case MSR_P6_PERFCTR1:
2126 pr = true;
2127 case MSR_P6_EVNTSEL0:
2128 case MSR_P6_EVNTSEL1:
2129 if (kvm_pmu_msr(vcpu, msr))
2130 return kvm_pmu_set_msr(vcpu, msr_info);
2131
2132 if (pr || data != 0)
2133 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2134 "0x%x data 0x%llx\n", msr, data);
2135 break;
2136 case MSR_K7_CLK_CTL:
2137
2138
2139
2140
2141
2142
2143
2144
2145 break;
2146 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2147 if (kvm_hv_msr_partition_wide(msr)) {
2148 int r;
2149 mutex_lock(&vcpu->kvm->lock);
2150 r = set_msr_hyperv_pw(vcpu, msr, data);
2151 mutex_unlock(&vcpu->kvm->lock);
2152 return r;
2153 } else
2154 return set_msr_hyperv(vcpu, msr, data);
2155 break;
2156 case MSR_IA32_BBL_CR_CTL3:
2157
2158
2159
2160 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
2161 break;
2162 case MSR_AMD64_OSVW_ID_LENGTH:
2163 if (!guest_cpuid_has_osvw(vcpu))
2164 return 1;
2165 vcpu->arch.osvw.length = data;
2166 break;
2167 case MSR_AMD64_OSVW_STATUS:
2168 if (!guest_cpuid_has_osvw(vcpu))
2169 return 1;
2170 vcpu->arch.osvw.status = data;
2171 break;
2172 default:
2173 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2174 return xen_hvm_config(vcpu, data);
2175 if (kvm_pmu_msr(vcpu, msr))
2176 return kvm_pmu_set_msr(vcpu, msr_info);
2177 if (!ignore_msrs) {
2178 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
2179 msr, data);
2180 return 1;
2181 } else {
2182 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
2183 msr, data);
2184 break;
2185 }
2186 }
2187 return 0;
2188}
2189EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2190
2191
2192
2193
2194
2195
2196
2197int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2198{
2199 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
2200}
2201
2202static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2203{
2204 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
2205
2206 if (!msr_mtrr_valid(msr))
2207 return 1;
2208
2209 if (msr == MSR_MTRRdefType)
2210 *pdata = vcpu->arch.mtrr_state.def_type +
2211 (vcpu->arch.mtrr_state.enabled << 10);
2212 else if (msr == MSR_MTRRfix64K_00000)
2213 *pdata = p[0];
2214 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
2215 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
2216 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
2217 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
2218 else if (msr == MSR_IA32_CR_PAT)
2219 *pdata = vcpu->arch.pat;
2220 else {
2221 int idx, is_mtrr_mask;
2222 u64 *pt;
2223
2224 idx = (msr - 0x200) / 2;
2225 is_mtrr_mask = msr - 0x200 - 2 * idx;
2226 if (!is_mtrr_mask)
2227 pt =
2228 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
2229 else
2230 pt =
2231 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
2232 *pdata = *pt;
2233 }
2234
2235 return 0;
2236}
2237
2238static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2239{
2240 u64 data;
2241 u64 mcg_cap = vcpu->arch.mcg_cap;
2242 unsigned bank_num = mcg_cap & 0xff;
2243
2244 switch (msr) {
2245 case MSR_IA32_P5_MC_ADDR:
2246 case MSR_IA32_P5_MC_TYPE:
2247 data = 0;
2248 break;
2249 case MSR_IA32_MCG_CAP:
2250 data = vcpu->arch.mcg_cap;
2251 break;
2252 case MSR_IA32_MCG_CTL:
2253 if (!(mcg_cap & MCG_CTL_P))
2254 return 1;
2255 data = vcpu->arch.mcg_ctl;
2256 break;
2257 case MSR_IA32_MCG_STATUS:
2258 data = vcpu->arch.mcg_status;
2259 break;
2260 default:
2261 if (msr >= MSR_IA32_MC0_CTL &&
2262 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
2263 u32 offset = msr - MSR_IA32_MC0_CTL;
2264 data = vcpu->arch.mce_banks[offset];
2265 break;
2266 }
2267 return 1;
2268 }
2269 *pdata = data;
2270 return 0;
2271}
2272
2273static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2274{
2275 u64 data = 0;
2276 struct kvm *kvm = vcpu->kvm;
2277
2278 switch (msr) {
2279 case HV_X64_MSR_GUEST_OS_ID:
2280 data = kvm->arch.hv_guest_os_id;
2281 break;
2282 case HV_X64_MSR_HYPERCALL:
2283 data = kvm->arch.hv_hypercall;
2284 break;
2285 default:
2286 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2287 return 1;
2288 }
2289
2290 *pdata = data;
2291 return 0;
2292}
2293
2294static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2295{
2296 u64 data = 0;
2297
2298 switch (msr) {
2299 case HV_X64_MSR_VP_INDEX: {
2300 int r;
2301 struct kvm_vcpu *v;
2302 kvm_for_each_vcpu(r, v, vcpu->kvm)
2303 if (v == vcpu)
2304 data = r;
2305 break;
2306 }
2307 case HV_X64_MSR_EOI:
2308 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
2309 case HV_X64_MSR_ICR:
2310 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
2311 case HV_X64_MSR_TPR:
2312 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
2313 case HV_X64_MSR_APIC_ASSIST_PAGE:
2314 data = vcpu->arch.hv_vapic;
2315 break;
2316 default:
2317 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2318 return 1;
2319 }
2320 *pdata = data;
2321 return 0;
2322}
2323
2324int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2325{
2326 u64 data;
2327
2328 switch (msr) {
2329 case MSR_IA32_PLATFORM_ID:
2330 case MSR_IA32_EBL_CR_POWERON:
2331 case MSR_IA32_DEBUGCTLMSR:
2332 case MSR_IA32_LASTBRANCHFROMIP:
2333 case MSR_IA32_LASTBRANCHTOIP:
2334 case MSR_IA32_LASTINTFROMIP:
2335 case MSR_IA32_LASTINTTOIP:
2336 case MSR_K8_SYSCFG:
2337 case MSR_K7_HWCR:
2338 case MSR_VM_HSAVE_PA:
2339 case MSR_K7_EVNTSEL0:
2340 case MSR_K7_PERFCTR0:
2341 case MSR_K8_INT_PENDING_MSG:
2342 case MSR_AMD64_NB_CFG:
2343 case MSR_FAM10H_MMIO_CONF_BASE:
2344 case MSR_AMD64_BU_CFG2:
2345 data = 0;
2346 break;
2347 case MSR_P6_PERFCTR0:
2348 case MSR_P6_PERFCTR1:
2349 case MSR_P6_EVNTSEL0:
2350 case MSR_P6_EVNTSEL1:
2351 if (kvm_pmu_msr(vcpu, msr))
2352 return kvm_pmu_get_msr(vcpu, msr, pdata);
2353 data = 0;
2354 break;
2355 case MSR_IA32_UCODE_REV:
2356 data = 0x100000000ULL;
2357 break;
2358 case MSR_MTRRcap:
2359 data = 0x500 | KVM_NR_VAR_MTRR;
2360 break;
2361 case 0x200 ... 0x2ff:
2362 return get_msr_mtrr(vcpu, msr, pdata);
2363 case 0xcd:
2364 data = 3;
2365 break;
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377 case MSR_EBC_FREQUENCY_ID:
2378 data = 1 << 24;
2379 break;
2380 case MSR_IA32_APICBASE:
2381 data = kvm_get_apic_base(vcpu);
2382 break;
2383 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2384 return kvm_x2apic_msr_read(vcpu, msr, pdata);
2385 break;
2386 case MSR_IA32_TSCDEADLINE:
2387 data = kvm_get_lapic_tscdeadline_msr(vcpu);
2388 break;
2389 case MSR_IA32_TSC_ADJUST:
2390 data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2391 break;
2392 case MSR_IA32_MISC_ENABLE:
2393 data = vcpu->arch.ia32_misc_enable_msr;
2394 break;
2395 case MSR_IA32_PERF_STATUS:
2396
2397 data = 1000ULL;
2398
2399 data |= (((uint64_t)4ULL) << 40);
2400 break;
2401 case MSR_EFER:
2402 data = vcpu->arch.efer;
2403 break;
2404 case MSR_KVM_WALL_CLOCK:
2405 case MSR_KVM_WALL_CLOCK_NEW:
2406 data = vcpu->kvm->arch.wall_clock;
2407 break;
2408 case MSR_KVM_SYSTEM_TIME:
2409 case MSR_KVM_SYSTEM_TIME_NEW:
2410 data = vcpu->arch.time;
2411 break;
2412 case MSR_KVM_ASYNC_PF_EN:
2413 data = vcpu->arch.apf.msr_val;
2414 break;
2415 case MSR_KVM_STEAL_TIME:
2416 data = vcpu->arch.st.msr_val;
2417 break;
2418 case MSR_KVM_PV_EOI_EN:
2419 data = vcpu->arch.pv_eoi.msr_val;
2420 break;
2421 case MSR_IA32_P5_MC_ADDR:
2422 case MSR_IA32_P5_MC_TYPE:
2423 case MSR_IA32_MCG_CAP:
2424 case MSR_IA32_MCG_CTL:
2425 case MSR_IA32_MCG_STATUS:
2426 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2427 return get_msr_mce(vcpu, msr, pdata);
2428 case MSR_K7_CLK_CTL:
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438 data = 0x20000000;
2439 break;
2440 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2441 if (kvm_hv_msr_partition_wide(msr)) {
2442 int r;
2443 mutex_lock(&vcpu->kvm->lock);
2444 r = get_msr_hyperv_pw(vcpu, msr, pdata);
2445 mutex_unlock(&vcpu->kvm->lock);
2446 return r;
2447 } else
2448 return get_msr_hyperv(vcpu, msr, pdata);
2449 break;
2450 case MSR_IA32_BBL_CR_CTL3:
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461 data = 0xbe702111;
2462 break;
2463 case MSR_AMD64_OSVW_ID_LENGTH:
2464 if (!guest_cpuid_has_osvw(vcpu))
2465 return 1;
2466 data = vcpu->arch.osvw.length;
2467 break;
2468 case MSR_AMD64_OSVW_STATUS:
2469 if (!guest_cpuid_has_osvw(vcpu))
2470 return 1;
2471 data = vcpu->arch.osvw.status;
2472 break;
2473 default:
2474 if (kvm_pmu_msr(vcpu, msr))
2475 return kvm_pmu_get_msr(vcpu, msr, pdata);
2476 if (!ignore_msrs) {
2477 vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
2478 return 1;
2479 } else {
2480 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
2481 data = 0;
2482 }
2483 break;
2484 }
2485 *pdata = data;
2486 return 0;
2487}
2488EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2489
2490
2491
2492
2493
2494
2495static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2496 struct kvm_msr_entry *entries,
2497 int (*do_msr)(struct kvm_vcpu *vcpu,
2498 unsigned index, u64 *data))
2499{
2500 int i, idx;
2501
2502 idx = srcu_read_lock(&vcpu->kvm->srcu);
2503 for (i = 0; i < msrs->nmsrs; ++i)
2504 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2505 break;
2506 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2507
2508 return i;
2509}
2510
2511
2512
2513
2514
2515
2516static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2517 int (*do_msr)(struct kvm_vcpu *vcpu,
2518 unsigned index, u64 *data),
2519 int writeback)
2520{
2521 struct kvm_msrs msrs;
2522 struct kvm_msr_entry *entries;
2523 int r, n;
2524 unsigned size;
2525
2526 r = -EFAULT;
2527 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2528 goto out;
2529
2530 r = -E2BIG;
2531 if (msrs.nmsrs >= MAX_IO_MSRS)
2532 goto out;
2533
2534 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2535 entries = memdup_user(user_msrs->entries, size);
2536 if (IS_ERR(entries)) {
2537 r = PTR_ERR(entries);
2538 goto out;
2539 }
2540
2541 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2542 if (r < 0)
2543 goto out_free;
2544
2545 r = -EFAULT;
2546 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2547 goto out_free;
2548
2549 r = n;
2550
2551out_free:
2552 kfree(entries);
2553out:
2554 return r;
2555}
2556
2557int kvm_dev_ioctl_check_extension(long ext)
2558{
2559 int r;
2560
2561 switch (ext) {
2562 case KVM_CAP_IRQCHIP:
2563 case KVM_CAP_HLT:
2564 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2565 case KVM_CAP_SET_TSS_ADDR:
2566 case KVM_CAP_EXT_CPUID:
2567 case KVM_CAP_CLOCKSOURCE:
2568 case KVM_CAP_PIT:
2569 case KVM_CAP_NOP_IO_DELAY:
2570 case KVM_CAP_MP_STATE:
2571 case KVM_CAP_SYNC_MMU:
2572 case KVM_CAP_USER_NMI:
2573 case KVM_CAP_REINJECT_CONTROL:
2574 case KVM_CAP_IRQ_INJECT_STATUS:
2575 case KVM_CAP_IRQFD:
2576 case KVM_CAP_IOEVENTFD:
2577 case KVM_CAP_PIT2:
2578 case KVM_CAP_PIT_STATE2:
2579 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2580 case KVM_CAP_XEN_HVM:
2581 case KVM_CAP_ADJUST_CLOCK:
2582 case KVM_CAP_VCPU_EVENTS:
2583 case KVM_CAP_HYPERV:
2584 case KVM_CAP_HYPERV_VAPIC:
2585 case KVM_CAP_HYPERV_SPIN:
2586 case KVM_CAP_PCI_SEGMENT:
2587 case KVM_CAP_DEBUGREGS:
2588 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2589 case KVM_CAP_XSAVE:
2590 case KVM_CAP_ASYNC_PF:
2591 case KVM_CAP_GET_TSC_KHZ:
2592 case KVM_CAP_KVMCLOCK_CTRL:
2593 case KVM_CAP_READONLY_MEM:
2594#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2595 case KVM_CAP_ASSIGN_DEV_IRQ:
2596 case KVM_CAP_PCI_2_3:
2597#endif
2598 r = 1;
2599 break;
2600 case KVM_CAP_COALESCED_MMIO:
2601 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2602 break;
2603 case KVM_CAP_VAPIC:
2604 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2605 break;
2606 case KVM_CAP_NR_VCPUS:
2607 r = KVM_SOFT_MAX_VCPUS;
2608 break;
2609 case KVM_CAP_MAX_VCPUS:
2610 r = KVM_MAX_VCPUS;
2611 break;
2612 case KVM_CAP_NR_MEMSLOTS:
2613 r = KVM_USER_MEM_SLOTS;
2614 break;
2615 case KVM_CAP_PV_MMU:
2616 r = 0;
2617 break;
2618#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2619 case KVM_CAP_IOMMU:
2620 r = iommu_present(&pci_bus_type);
2621 break;
2622#endif
2623 case KVM_CAP_MCE:
2624 r = KVM_MAX_MCE_BANKS;
2625 break;
2626 case KVM_CAP_XCRS:
2627 r = cpu_has_xsave;
2628 break;
2629 case KVM_CAP_TSC_CONTROL:
2630 r = kvm_has_tsc_control;
2631 break;
2632 case KVM_CAP_TSC_DEADLINE_TIMER:
2633 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2634 break;
2635 default:
2636 r = 0;
2637 break;
2638 }
2639 return r;
2640
2641}
2642
2643long kvm_arch_dev_ioctl(struct file *filp,
2644 unsigned int ioctl, unsigned long arg)
2645{
2646 void __user *argp = (void __user *)arg;
2647 long r;
2648
2649 switch (ioctl) {
2650 case KVM_GET_MSR_INDEX_LIST: {
2651 struct kvm_msr_list __user *user_msr_list = argp;
2652 struct kvm_msr_list msr_list;
2653 unsigned n;
2654
2655 r = -EFAULT;
2656 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2657 goto out;
2658 n = msr_list.nmsrs;
2659 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2660 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2661 goto out;
2662 r = -E2BIG;
2663 if (n < msr_list.nmsrs)
2664 goto out;
2665 r = -EFAULT;
2666 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2667 num_msrs_to_save * sizeof(u32)))
2668 goto out;
2669 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2670 &emulated_msrs,
2671 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2672 goto out;
2673 r = 0;
2674 break;
2675 }
2676 case KVM_GET_SUPPORTED_CPUID: {
2677 struct kvm_cpuid2 __user *cpuid_arg = argp;
2678 struct kvm_cpuid2 cpuid;
2679
2680 r = -EFAULT;
2681 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2682 goto out;
2683 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
2684 cpuid_arg->entries);
2685 if (r)
2686 goto out;
2687
2688 r = -EFAULT;
2689 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2690 goto out;
2691 r = 0;
2692 break;
2693 }
2694 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2695 u64 mce_cap;
2696
2697 mce_cap = KVM_MCE_CAP_SUPPORTED;
2698 r = -EFAULT;
2699 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2700 goto out;
2701 r = 0;
2702 break;
2703 }
2704 default:
2705 r = -EINVAL;
2706 }
2707out:
2708 return r;
2709}
2710
2711static void wbinvd_ipi(void *garbage)
2712{
2713 wbinvd();
2714}
2715
2716static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2717{
2718 return vcpu->kvm->arch.iommu_domain &&
2719 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2720}
2721
2722void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2723{
2724
2725 if (need_emulate_wbinvd(vcpu)) {
2726 if (kvm_x86_ops->has_wbinvd_exit())
2727 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2728 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2729 smp_call_function_single(vcpu->cpu,
2730 wbinvd_ipi, NULL, 1);
2731 }
2732
2733 kvm_x86_ops->vcpu_load(vcpu, cpu);
2734
2735
2736 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2737 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2738 vcpu->arch.tsc_offset_adjustment = 0;
2739 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2740 }
2741
2742 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2743 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2744 native_read_tsc() - vcpu->arch.last_host_tsc;
2745 if (tsc_delta < 0)
2746 mark_tsc_unstable("KVM discovered backwards TSC");
2747 if (check_tsc_unstable()) {
2748 u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
2749 vcpu->arch.last_guest_tsc);
2750 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2751 vcpu->arch.tsc_catchup = 1;
2752 }
2753
2754
2755
2756
2757 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2758 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2759 if (vcpu->cpu != cpu)
2760 kvm_migrate_timers(vcpu);
2761 vcpu->cpu = cpu;
2762 }
2763
2764 accumulate_steal_time(vcpu);
2765 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2766}
2767
2768void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2769{
2770 kvm_x86_ops->vcpu_put(vcpu);
2771 kvm_put_guest_fpu(vcpu);
2772 vcpu->arch.last_host_tsc = native_read_tsc();
2773}
2774
2775static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2776 struct kvm_lapic_state *s)
2777{
2778 kvm_x86_ops->sync_pir_to_irr(vcpu);
2779 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2780
2781 return 0;
2782}
2783
2784static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2785 struct kvm_lapic_state *s)
2786{
2787 kvm_apic_post_state_restore(vcpu, s);
2788 update_cr8_intercept(vcpu);
2789
2790 return 0;
2791}
2792
2793static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2794 struct kvm_interrupt *irq)
2795{
2796 if (irq->irq >= KVM_NR_INTERRUPTS)
2797 return -EINVAL;
2798 if (irqchip_in_kernel(vcpu->kvm))
2799 return -ENXIO;
2800
2801 kvm_queue_interrupt(vcpu, irq->irq, false);
2802 kvm_make_request(KVM_REQ_EVENT, vcpu);
2803
2804 return 0;
2805}
2806
2807static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2808{
2809 kvm_inject_nmi(vcpu);
2810
2811 return 0;
2812}
2813
2814static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2815 struct kvm_tpr_access_ctl *tac)
2816{
2817 if (tac->flags)
2818 return -EINVAL;
2819 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2820 return 0;
2821}
2822
2823static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2824 u64 mcg_cap)
2825{
2826 int r;
2827 unsigned bank_num = mcg_cap & 0xff, bank;
2828
2829 r = -EINVAL;
2830 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2831 goto out;
2832 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2833 goto out;
2834 r = 0;
2835 vcpu->arch.mcg_cap = mcg_cap;
2836
2837 if (mcg_cap & MCG_CTL_P)
2838 vcpu->arch.mcg_ctl = ~(u64)0;
2839
2840 for (bank = 0; bank < bank_num; bank++)
2841 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2842out:
2843 return r;
2844}
2845
2846static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2847 struct kvm_x86_mce *mce)
2848{
2849 u64 mcg_cap = vcpu->arch.mcg_cap;
2850 unsigned bank_num = mcg_cap & 0xff;
2851 u64 *banks = vcpu->arch.mce_banks;
2852
2853 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2854 return -EINVAL;
2855
2856
2857
2858
2859 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2860 vcpu->arch.mcg_ctl != ~(u64)0)
2861 return 0;
2862 banks += 4 * mce->bank;
2863
2864
2865
2866
2867 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2868 return 0;
2869 if (mce->status & MCI_STATUS_UC) {
2870 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2871 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2872 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2873 return 0;
2874 }
2875 if (banks[1] & MCI_STATUS_VAL)
2876 mce->status |= MCI_STATUS_OVER;
2877 banks[2] = mce->addr;
2878 banks[3] = mce->misc;
2879 vcpu->arch.mcg_status = mce->mcg_status;
2880 banks[1] = mce->status;
2881 kvm_queue_exception(vcpu, MC_VECTOR);
2882 } else if (!(banks[1] & MCI_STATUS_VAL)
2883 || !(banks[1] & MCI_STATUS_UC)) {
2884 if (banks[1] & MCI_STATUS_VAL)
2885 mce->status |= MCI_STATUS_OVER;
2886 banks[2] = mce->addr;
2887 banks[3] = mce->misc;
2888 banks[1] = mce->status;
2889 } else
2890 banks[1] |= MCI_STATUS_OVER;
2891 return 0;
2892}
2893
2894static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2895 struct kvm_vcpu_events *events)
2896{
2897 process_nmi(vcpu);
2898 events->exception.injected =
2899 vcpu->arch.exception.pending &&
2900 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2901 events->exception.nr = vcpu->arch.exception.nr;
2902 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2903 events->exception.pad = 0;
2904 events->exception.error_code = vcpu->arch.exception.error_code;
2905
2906 events->interrupt.injected =
2907 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2908 events->interrupt.nr = vcpu->arch.interrupt.nr;
2909 events->interrupt.soft = 0;
2910 events->interrupt.shadow =
2911 kvm_x86_ops->get_interrupt_shadow(vcpu,
2912 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2913
2914 events->nmi.injected = vcpu->arch.nmi_injected;
2915 events->nmi.pending = vcpu->arch.nmi_pending != 0;
2916 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2917 events->nmi.pad = 0;
2918
2919 events->sipi_vector = 0;
2920
2921 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2922 | KVM_VCPUEVENT_VALID_SHADOW);
2923 memset(&events->reserved, 0, sizeof(events->reserved));
2924}
2925
2926static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2927 struct kvm_vcpu_events *events)
2928{
2929 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2930 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2931 | KVM_VCPUEVENT_VALID_SHADOW))
2932 return -EINVAL;
2933
2934 process_nmi(vcpu);
2935 vcpu->arch.exception.pending = events->exception.injected;
2936 vcpu->arch.exception.nr = events->exception.nr;
2937 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2938 vcpu->arch.exception.error_code = events->exception.error_code;
2939
2940 vcpu->arch.interrupt.pending = events->interrupt.injected;
2941 vcpu->arch.interrupt.nr = events->interrupt.nr;
2942 vcpu->arch.interrupt.soft = events->interrupt.soft;
2943 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2944 kvm_x86_ops->set_interrupt_shadow(vcpu,
2945 events->interrupt.shadow);
2946
2947 vcpu->arch.nmi_injected = events->nmi.injected;
2948 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2949 vcpu->arch.nmi_pending = events->nmi.pending;
2950 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2951
2952 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
2953 kvm_vcpu_has_lapic(vcpu))
2954 vcpu->arch.apic->sipi_vector = events->sipi_vector;
2955
2956 kvm_make_request(KVM_REQ_EVENT, vcpu);
2957
2958 return 0;
2959}
2960
2961static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2962 struct kvm_debugregs *dbgregs)
2963{
2964 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2965 dbgregs->dr6 = vcpu->arch.dr6;
2966 dbgregs->dr7 = vcpu->arch.dr7;
2967 dbgregs->flags = 0;
2968 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
2969}
2970
2971static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2972 struct kvm_debugregs *dbgregs)
2973{
2974 if (dbgregs->flags)
2975 return -EINVAL;
2976
2977 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2978 vcpu->arch.dr6 = dbgregs->dr6;
2979 vcpu->arch.dr7 = dbgregs->dr7;
2980
2981 return 0;
2982}
2983
2984static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2985 struct kvm_xsave *guest_xsave)
2986{
2987 if (cpu_has_xsave)
2988 memcpy(guest_xsave->region,
2989 &vcpu->arch.guest_fpu.state->xsave,
2990 xstate_size);
2991 else {
2992 memcpy(guest_xsave->region,
2993 &vcpu->arch.guest_fpu.state->fxsave,
2994 sizeof(struct i387_fxsave_struct));
2995 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
2996 XSTATE_FPSSE;
2997 }
2998}
2999
3000static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3001 struct kvm_xsave *guest_xsave)
3002{
3003 u64 xstate_bv =
3004 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3005
3006 if (cpu_has_xsave)
3007 memcpy(&vcpu->arch.guest_fpu.state->xsave,
3008 guest_xsave->region, xstate_size);
3009 else {
3010 if (xstate_bv & ~XSTATE_FPSSE)
3011 return -EINVAL;
3012 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
3013 guest_xsave->region, sizeof(struct i387_fxsave_struct));
3014 }
3015 return 0;
3016}
3017
3018static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
3019 struct kvm_xcrs *guest_xcrs)
3020{
3021 if (!cpu_has_xsave) {
3022 guest_xcrs->nr_xcrs = 0;
3023 return;
3024 }
3025
3026 guest_xcrs->nr_xcrs = 1;
3027 guest_xcrs->flags = 0;
3028 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
3029 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
3030}
3031
3032static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3033 struct kvm_xcrs *guest_xcrs)
3034{
3035 int i, r = 0;
3036
3037 if (!cpu_has_xsave)
3038 return -EINVAL;
3039
3040 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
3041 return -EINVAL;
3042
3043 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3044
3045 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
3046 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3047 guest_xcrs->xcrs[0].value);
3048 break;
3049 }
3050 if (r)
3051 r = -EINVAL;
3052 return r;
3053}
3054
3055
3056
3057
3058
3059
3060
3061static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
3062{
3063 if (!vcpu->arch.pv_time_enabled)
3064 return -EINVAL;
3065 vcpu->arch.pvclock_set_guest_stopped_request = true;
3066 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3067 return 0;
3068}
3069
3070long kvm_arch_vcpu_ioctl(struct file *filp,
3071 unsigned int ioctl, unsigned long arg)
3072{
3073 struct kvm_vcpu *vcpu = filp->private_data;
3074 void __user *argp = (void __user *)arg;
3075 int r;
3076 union {
3077 struct kvm_lapic_state *lapic;
3078 struct kvm_xsave *xsave;
3079 struct kvm_xcrs *xcrs;
3080 void *buffer;
3081 } u;
3082
3083 u.buffer = NULL;
3084 switch (ioctl) {
3085 case KVM_GET_LAPIC: {
3086 r = -EINVAL;
3087 if (!vcpu->arch.apic)
3088 goto out;
3089 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3090
3091 r = -ENOMEM;
3092 if (!u.lapic)
3093 goto out;
3094 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3095 if (r)
3096 goto out;
3097 r = -EFAULT;
3098 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3099 goto out;
3100 r = 0;
3101 break;
3102 }
3103 case KVM_SET_LAPIC: {
3104 r = -EINVAL;
3105 if (!vcpu->arch.apic)
3106 goto out;
3107 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3108 if (IS_ERR(u.lapic))
3109 return PTR_ERR(u.lapic);
3110
3111 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3112 break;
3113 }
3114 case KVM_INTERRUPT: {
3115 struct kvm_interrupt irq;
3116
3117 r = -EFAULT;
3118 if (copy_from_user(&irq, argp, sizeof irq))
3119 goto out;
3120 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3121 break;
3122 }
3123 case KVM_NMI: {
3124 r = kvm_vcpu_ioctl_nmi(vcpu);
3125 break;
3126 }
3127 case KVM_SET_CPUID: {
3128 struct kvm_cpuid __user *cpuid_arg = argp;
3129 struct kvm_cpuid cpuid;
3130
3131 r = -EFAULT;
3132 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3133 goto out;
3134 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3135 break;
3136 }
3137 case KVM_SET_CPUID2: {
3138 struct kvm_cpuid2 __user *cpuid_arg = argp;
3139 struct kvm_cpuid2 cpuid;
3140
3141 r = -EFAULT;
3142 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3143 goto out;
3144 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3145 cpuid_arg->entries);
3146 break;
3147 }
3148 case KVM_GET_CPUID2: {
3149 struct kvm_cpuid2 __user *cpuid_arg = argp;
3150 struct kvm_cpuid2 cpuid;
3151
3152 r = -EFAULT;
3153 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3154 goto out;
3155 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3156 cpuid_arg->entries);
3157 if (r)
3158 goto out;
3159 r = -EFAULT;
3160 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3161 goto out;
3162 r = 0;
3163 break;
3164 }
3165 case KVM_GET_MSRS:
3166 r = msr_io(vcpu, argp, kvm_get_msr, 1);
3167 break;
3168 case KVM_SET_MSRS:
3169 r = msr_io(vcpu, argp, do_set_msr, 0);
3170 break;
3171 case KVM_TPR_ACCESS_REPORTING: {
3172 struct kvm_tpr_access_ctl tac;
3173
3174 r = -EFAULT;
3175 if (copy_from_user(&tac, argp, sizeof tac))
3176 goto out;
3177 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3178 if (r)
3179 goto out;
3180 r = -EFAULT;
3181 if (copy_to_user(argp, &tac, sizeof tac))
3182 goto out;
3183 r = 0;
3184 break;
3185 };
3186 case KVM_SET_VAPIC_ADDR: {
3187 struct kvm_vapic_addr va;
3188
3189 r = -EINVAL;
3190 if (!irqchip_in_kernel(vcpu->kvm))
3191 goto out;
3192 r = -EFAULT;
3193 if (copy_from_user(&va, argp, sizeof va))
3194 goto out;
3195 r = 0;
3196 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3197 break;
3198 }
3199 case KVM_X86_SETUP_MCE: {
3200 u64 mcg_cap;
3201
3202 r = -EFAULT;
3203 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3204 goto out;
3205 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3206 break;
3207 }
3208 case KVM_X86_SET_MCE: {
3209 struct kvm_x86_mce mce;
3210
3211 r = -EFAULT;
3212 if (copy_from_user(&mce, argp, sizeof mce))
3213 goto out;
3214 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3215 break;
3216 }
3217 case KVM_GET_VCPU_EVENTS: {
3218 struct kvm_vcpu_events events;
3219
3220 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3221
3222 r = -EFAULT;
3223 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3224 break;
3225 r = 0;
3226 break;
3227 }
3228 case KVM_SET_VCPU_EVENTS: {
3229 struct kvm_vcpu_events events;
3230
3231 r = -EFAULT;
3232 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3233 break;
3234
3235 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3236 break;
3237 }
3238 case KVM_GET_DEBUGREGS: {
3239 struct kvm_debugregs dbgregs;
3240
3241 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3242
3243 r = -EFAULT;
3244 if (copy_to_user(argp, &dbgregs,
3245 sizeof(struct kvm_debugregs)))
3246 break;
3247 r = 0;
3248 break;
3249 }
3250 case KVM_SET_DEBUGREGS: {
3251 struct kvm_debugregs dbgregs;
3252
3253 r = -EFAULT;
3254 if (copy_from_user(&dbgregs, argp,
3255 sizeof(struct kvm_debugregs)))
3256 break;
3257
3258 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3259 break;
3260 }
3261 case KVM_GET_XSAVE: {
3262 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3263 r = -ENOMEM;
3264 if (!u.xsave)
3265 break;
3266
3267 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3268
3269 r = -EFAULT;
3270 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3271 break;
3272 r = 0;
3273 break;
3274 }
3275 case KVM_SET_XSAVE: {
3276 u.xsave = memdup_user(argp, sizeof(*u.xsave));
3277 if (IS_ERR(u.xsave))
3278 return PTR_ERR(u.xsave);
3279
3280 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3281 break;
3282 }
3283 case KVM_GET_XCRS: {
3284 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3285 r = -ENOMEM;
3286 if (!u.xcrs)
3287 break;
3288
3289 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3290
3291 r = -EFAULT;
3292 if (copy_to_user(argp, u.xcrs,
3293 sizeof(struct kvm_xcrs)))
3294 break;
3295 r = 0;
3296 break;
3297 }
3298 case KVM_SET_XCRS: {
3299 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
3300 if (IS_ERR(u.xcrs))
3301 return PTR_ERR(u.xcrs);
3302
3303 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3304 break;
3305 }
3306 case KVM_SET_TSC_KHZ: {
3307 u32 user_tsc_khz;
3308
3309 r = -EINVAL;
3310 user_tsc_khz = (u32)arg;
3311
3312 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3313 goto out;
3314
3315 if (user_tsc_khz == 0)
3316 user_tsc_khz = tsc_khz;
3317
3318 kvm_set_tsc_khz(vcpu, user_tsc_khz);
3319
3320 r = 0;
3321 goto out;
3322 }
3323 case KVM_GET_TSC_KHZ: {
3324 r = vcpu->arch.virtual_tsc_khz;
3325 goto out;
3326 }
3327 case KVM_KVMCLOCK_CTRL: {
3328 r = kvm_set_guest_paused(vcpu);
3329 goto out;
3330 }
3331 default:
3332 r = -EINVAL;
3333 }
3334out:
3335 kfree(u.buffer);
3336 return r;
3337}
3338
3339int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3340{
3341 return VM_FAULT_SIGBUS;
3342}
3343
3344static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3345{
3346 int ret;
3347
3348 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3349 return -EINVAL;
3350 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3351 return ret;
3352}
3353
3354static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3355 u64 ident_addr)
3356{
3357 kvm->arch.ept_identity_map_addr = ident_addr;
3358 return 0;
3359}
3360
3361static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3362 u32 kvm_nr_mmu_pages)
3363{
3364 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3365 return -EINVAL;
3366
3367 mutex_lock(&kvm->slots_lock);
3368
3369 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3370 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3371
3372 mutex_unlock(&kvm->slots_lock);
3373 return 0;
3374}
3375
3376static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3377{
3378 return kvm->arch.n_max_mmu_pages;
3379}
3380
3381static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3382{
3383 int r;
3384
3385 r = 0;
3386 switch (chip->chip_id) {
3387 case KVM_IRQCHIP_PIC_MASTER:
3388 memcpy(&chip->chip.pic,
3389 &pic_irqchip(kvm)->pics[0],
3390 sizeof(struct kvm_pic_state));
3391 break;
3392 case KVM_IRQCHIP_PIC_SLAVE:
3393 memcpy(&chip->chip.pic,
3394 &pic_irqchip(kvm)->pics[1],
3395 sizeof(struct kvm_pic_state));
3396 break;
3397 case KVM_IRQCHIP_IOAPIC:
3398 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3399 break;
3400 default:
3401 r = -EINVAL;
3402 break;
3403 }
3404 return r;
3405}
3406
3407static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3408{
3409 int r;
3410
3411 r = 0;
3412 switch (chip->chip_id) {
3413 case KVM_IRQCHIP_PIC_MASTER:
3414 spin_lock(&pic_irqchip(kvm)->lock);
3415 memcpy(&pic_irqchip(kvm)->pics[0],
3416 &chip->chip.pic,
3417 sizeof(struct kvm_pic_state));
3418 spin_unlock(&pic_irqchip(kvm)->lock);
3419 break;
3420 case KVM_IRQCHIP_PIC_SLAVE:
3421 spin_lock(&pic_irqchip(kvm)->lock);
3422 memcpy(&pic_irqchip(kvm)->pics[1],
3423 &chip->chip.pic,
3424 sizeof(struct kvm_pic_state));
3425 spin_unlock(&pic_irqchip(kvm)->lock);
3426 break;
3427 case KVM_IRQCHIP_IOAPIC:
3428 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3429 break;
3430 default:
3431 r = -EINVAL;
3432 break;
3433 }
3434 kvm_pic_update_irq(pic_irqchip(kvm));
3435 return r;
3436}
3437
3438static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3439{
3440 int r = 0;
3441
3442 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3443 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3444 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3445 return r;
3446}
3447
3448static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3449{
3450 int r = 0;
3451
3452 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3453 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3454 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3455 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3456 return r;
3457}
3458
3459static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3460{
3461 int r = 0;
3462
3463 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3464 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3465 sizeof(ps->channels));
3466 ps->flags = kvm->arch.vpit->pit_state.flags;
3467 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3468 memset(&ps->reserved, 0, sizeof(ps->reserved));
3469 return r;
3470}
3471
3472static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3473{
3474 int r = 0, start = 0;
3475 u32 prev_legacy, cur_legacy;
3476 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3477 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3478 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3479 if (!prev_legacy && cur_legacy)
3480 start = 1;
3481 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3482 sizeof(kvm->arch.vpit->pit_state.channels));
3483 kvm->arch.vpit->pit_state.flags = ps->flags;
3484 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3485 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3486 return r;
3487}
3488
3489static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3490 struct kvm_reinject_control *control)
3491{
3492 if (!kvm->arch.vpit)
3493 return -ENXIO;
3494 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3495 kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
3496 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3497 return 0;
3498}
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3520{
3521 int r;
3522 struct kvm_memory_slot *memslot;
3523 unsigned long n, i;
3524 unsigned long *dirty_bitmap;
3525 unsigned long *dirty_bitmap_buffer;
3526 bool is_dirty = false;
3527
3528 mutex_lock(&kvm->slots_lock);
3529
3530 r = -EINVAL;
3531 if (log->slot >= KVM_USER_MEM_SLOTS)
3532 goto out;
3533
3534 memslot = id_to_memslot(kvm->memslots, log->slot);
3535
3536 dirty_bitmap = memslot->dirty_bitmap;
3537 r = -ENOENT;
3538 if (!dirty_bitmap)
3539 goto out;
3540
3541 n = kvm_dirty_bitmap_bytes(memslot);
3542
3543 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3544 memset(dirty_bitmap_buffer, 0, n);
3545
3546 spin_lock(&kvm->mmu_lock);
3547
3548 for (i = 0; i < n / sizeof(long); i++) {
3549 unsigned long mask;
3550 gfn_t offset;
3551
3552 if (!dirty_bitmap[i])
3553 continue;
3554
3555 is_dirty = true;
3556
3557 mask = xchg(&dirty_bitmap[i], 0);
3558 dirty_bitmap_buffer[i] = mask;
3559
3560 offset = i * BITS_PER_LONG;
3561 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3562 }
3563 if (is_dirty)
3564 kvm_flush_remote_tlbs(kvm);
3565
3566 spin_unlock(&kvm->mmu_lock);
3567
3568 r = -EFAULT;
3569 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3570 goto out;
3571
3572 r = 0;
3573out:
3574 mutex_unlock(&kvm->slots_lock);
3575 return r;
3576}
3577
3578int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
3579 bool line_status)
3580{
3581 if (!irqchip_in_kernel(kvm))
3582 return -ENXIO;
3583
3584 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3585 irq_event->irq, irq_event->level,
3586 line_status);
3587 return 0;
3588}
3589
3590long kvm_arch_vm_ioctl(struct file *filp,
3591 unsigned int ioctl, unsigned long arg)
3592{
3593 struct kvm *kvm = filp->private_data;
3594 void __user *argp = (void __user *)arg;
3595 int r = -ENOTTY;
3596
3597
3598
3599
3600
3601 union {
3602 struct kvm_pit_state ps;
3603 struct kvm_pit_state2 ps2;
3604 struct kvm_pit_config pit_config;
3605 } u;
3606
3607 switch (ioctl) {
3608 case KVM_SET_TSS_ADDR:
3609 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3610 break;
3611 case KVM_SET_IDENTITY_MAP_ADDR: {
3612 u64 ident_addr;
3613
3614 r = -EFAULT;
3615 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3616 goto out;
3617 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3618 break;
3619 }
3620 case KVM_SET_NR_MMU_PAGES:
3621 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3622 break;
3623 case KVM_GET_NR_MMU_PAGES:
3624 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3625 break;
3626 case KVM_CREATE_IRQCHIP: {
3627 struct kvm_pic *vpic;
3628
3629 mutex_lock(&kvm->lock);
3630 r = -EEXIST;
3631 if (kvm->arch.vpic)
3632 goto create_irqchip_unlock;
3633 r = -EINVAL;
3634 if (atomic_read(&kvm->online_vcpus))
3635 goto create_irqchip_unlock;
3636 r = -ENOMEM;
3637 vpic = kvm_create_pic(kvm);
3638 if (vpic) {
3639 r = kvm_ioapic_init(kvm);
3640 if (r) {
3641 mutex_lock(&kvm->slots_lock);
3642 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3643 &vpic->dev_master);
3644 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3645 &vpic->dev_slave);
3646 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3647 &vpic->dev_eclr);
3648 mutex_unlock(&kvm->slots_lock);
3649 kfree(vpic);
3650 goto create_irqchip_unlock;
3651 }
3652 } else
3653 goto create_irqchip_unlock;
3654 smp_wmb();
3655 kvm->arch.vpic = vpic;
3656 smp_wmb();
3657 r = kvm_setup_default_irq_routing(kvm);
3658 if (r) {
3659 mutex_lock(&kvm->slots_lock);
3660 mutex_lock(&kvm->irq_lock);
3661 kvm_ioapic_destroy(kvm);
3662 kvm_destroy_pic(kvm);
3663 mutex_unlock(&kvm->irq_lock);
3664 mutex_unlock(&kvm->slots_lock);
3665 }
3666 create_irqchip_unlock:
3667 mutex_unlock(&kvm->lock);
3668 break;
3669 }
3670 case KVM_CREATE_PIT:
3671 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3672 goto create_pit;
3673 case KVM_CREATE_PIT2:
3674 r = -EFAULT;
3675 if (copy_from_user(&u.pit_config, argp,
3676 sizeof(struct kvm_pit_config)))
3677 goto out;
3678 create_pit:
3679 mutex_lock(&kvm->slots_lock);
3680 r = -EEXIST;
3681 if (kvm->arch.vpit)
3682 goto create_pit_unlock;
3683 r = -ENOMEM;
3684 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3685 if (kvm->arch.vpit)
3686 r = 0;
3687 create_pit_unlock:
3688 mutex_unlock(&kvm->slots_lock);
3689 break;
3690 case KVM_GET_IRQCHIP: {
3691
3692 struct kvm_irqchip *chip;
3693
3694 chip = memdup_user(argp, sizeof(*chip));
3695 if (IS_ERR(chip)) {
3696 r = PTR_ERR(chip);
3697 goto out;
3698 }
3699
3700 r = -ENXIO;
3701 if (!irqchip_in_kernel(kvm))
3702 goto get_irqchip_out;
3703 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3704 if (r)
3705 goto get_irqchip_out;
3706 r = -EFAULT;
3707 if (copy_to_user(argp, chip, sizeof *chip))
3708 goto get_irqchip_out;
3709 r = 0;
3710 get_irqchip_out:
3711 kfree(chip);
3712 break;
3713 }
3714 case KVM_SET_IRQCHIP: {
3715
3716 struct kvm_irqchip *chip;
3717
3718 chip = memdup_user(argp, sizeof(*chip));
3719 if (IS_ERR(chip)) {
3720 r = PTR_ERR(chip);
3721 goto out;
3722 }
3723
3724 r = -ENXIO;
3725 if (!irqchip_in_kernel(kvm))
3726 goto set_irqchip_out;
3727 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3728 if (r)
3729 goto set_irqchip_out;
3730 r = 0;
3731 set_irqchip_out:
3732 kfree(chip);
3733 break;
3734 }
3735 case KVM_GET_PIT: {
3736 r = -EFAULT;
3737 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3738 goto out;
3739 r = -ENXIO;
3740 if (!kvm->arch.vpit)
3741 goto out;
3742 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3743 if (r)
3744 goto out;
3745 r = -EFAULT;
3746 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3747 goto out;
3748 r = 0;
3749 break;
3750 }
3751 case KVM_SET_PIT: {
3752 r = -EFAULT;
3753 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3754 goto out;
3755 r = -ENXIO;
3756 if (!kvm->arch.vpit)
3757 goto out;
3758 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3759 break;
3760 }
3761 case KVM_GET_PIT2: {
3762 r = -ENXIO;
3763 if (!kvm->arch.vpit)
3764 goto out;
3765 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3766 if (r)
3767 goto out;
3768 r = -EFAULT;
3769 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3770 goto out;
3771 r = 0;
3772 break;
3773 }
3774 case KVM_SET_PIT2: {
3775 r = -EFAULT;
3776 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3777 goto out;
3778 r = -ENXIO;
3779 if (!kvm->arch.vpit)
3780 goto out;
3781 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3782 break;
3783 }
3784 case KVM_REINJECT_CONTROL: {
3785 struct kvm_reinject_control control;
3786 r = -EFAULT;
3787 if (copy_from_user(&control, argp, sizeof(control)))
3788 goto out;
3789 r = kvm_vm_ioctl_reinject(kvm, &control);
3790 break;
3791 }
3792 case KVM_XEN_HVM_CONFIG: {
3793 r = -EFAULT;
3794 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3795 sizeof(struct kvm_xen_hvm_config)))
3796 goto out;
3797 r = -EINVAL;
3798 if (kvm->arch.xen_hvm_config.flags)
3799 goto out;
3800 r = 0;
3801 break;
3802 }
3803 case KVM_SET_CLOCK: {
3804 struct kvm_clock_data user_ns;
3805 u64 now_ns;
3806 s64 delta;
3807
3808 r = -EFAULT;
3809 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3810 goto out;
3811
3812 r = -EINVAL;
3813 if (user_ns.flags)
3814 goto out;
3815
3816 r = 0;
3817 local_irq_disable();
3818 now_ns = get_kernel_ns();
3819 delta = user_ns.clock - now_ns;
3820 local_irq_enable();
3821 kvm->arch.kvmclock_offset = delta;
3822 kvm_gen_update_masterclock(kvm);
3823 break;
3824 }
3825 case KVM_GET_CLOCK: {
3826 struct kvm_clock_data user_ns;
3827 u64 now_ns;
3828
3829 local_irq_disable();
3830 now_ns = get_kernel_ns();
3831 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3832 local_irq_enable();
3833 user_ns.flags = 0;
3834 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3835
3836 r = -EFAULT;
3837 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3838 goto out;
3839 r = 0;
3840 break;
3841 }
3842
3843 default:
3844 ;
3845 }
3846out:
3847 return r;
3848}
3849
3850static void kvm_init_msr_list(void)
3851{
3852 u32 dummy[2];
3853 unsigned i, j;
3854
3855
3856 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3857 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3858 continue;
3859 if (j < i)
3860 msrs_to_save[j] = msrs_to_save[i];
3861 j++;
3862 }
3863 num_msrs_to_save = j;
3864}
3865
3866static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3867 const void *v)
3868{
3869 int handled = 0;
3870 int n;
3871
3872 do {
3873 n = min(len, 8);
3874 if (!(vcpu->arch.apic &&
3875 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
3876 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3877 break;
3878 handled += n;
3879 addr += n;
3880 len -= n;
3881 v += n;
3882 } while (len);
3883
3884 return handled;
3885}
3886
3887static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3888{
3889 int handled = 0;
3890 int n;
3891
3892 do {
3893 n = min(len, 8);
3894 if (!(vcpu->arch.apic &&
3895 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
3896 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3897 break;
3898 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
3899 handled += n;
3900 addr += n;
3901 len -= n;
3902 v += n;
3903 } while (len);
3904
3905 return handled;
3906}
3907
3908static void kvm_set_segment(struct kvm_vcpu *vcpu,
3909 struct kvm_segment *var, int seg)
3910{
3911 kvm_x86_ops->set_segment(vcpu, var, seg);
3912}
3913
3914void kvm_get_segment(struct kvm_vcpu *vcpu,
3915 struct kvm_segment *var, int seg)
3916{
3917 kvm_x86_ops->get_segment(vcpu, var, seg);
3918}
3919
3920gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3921{
3922 gpa_t t_gpa;
3923 struct x86_exception exception;
3924
3925 BUG_ON(!mmu_is_nested(vcpu));
3926
3927
3928 access |= PFERR_USER_MASK;
3929 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
3930
3931 return t_gpa;
3932}
3933
3934gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
3935 struct x86_exception *exception)
3936{
3937 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3938 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3939}
3940
3941 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
3942 struct x86_exception *exception)
3943{
3944 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3945 access |= PFERR_FETCH_MASK;
3946 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3947}
3948
3949gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
3950 struct x86_exception *exception)
3951{
3952 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3953 access |= PFERR_WRITE_MASK;
3954 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3955}
3956
3957
3958gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
3959 struct x86_exception *exception)
3960{
3961 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
3962}
3963
3964static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3965 struct kvm_vcpu *vcpu, u32 access,
3966 struct x86_exception *exception)
3967{
3968 void *data = val;
3969 int r = X86EMUL_CONTINUE;
3970
3971 while (bytes) {
3972 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
3973 exception);
3974 unsigned offset = addr & (PAGE_SIZE-1);
3975 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3976 int ret;
3977
3978 if (gpa == UNMAPPED_GVA)
3979 return X86EMUL_PROPAGATE_FAULT;
3980 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3981 if (ret < 0) {
3982 r = X86EMUL_IO_NEEDED;
3983 goto out;
3984 }
3985
3986 bytes -= toread;
3987 data += toread;
3988 addr += toread;
3989 }
3990out:
3991 return r;
3992}
3993
3994
3995static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
3996 gva_t addr, void *val, unsigned int bytes,
3997 struct x86_exception *exception)
3998{
3999 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4000 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4001
4002 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
4003 access | PFERR_FETCH_MASK,
4004 exception);
4005}
4006
4007int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
4008 gva_t addr, void *val, unsigned int bytes,
4009 struct x86_exception *exception)
4010{
4011 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4012 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4013
4014 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
4015 exception);
4016}
4017EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
4018
4019static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4020 gva_t addr, void *val, unsigned int bytes,
4021 struct x86_exception *exception)
4022{
4023 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4024 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
4025}
4026
4027int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4028 gva_t addr, void *val,
4029 unsigned int bytes,
4030 struct x86_exception *exception)
4031{
4032 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4033 void *data = val;
4034 int r = X86EMUL_CONTINUE;
4035
4036 while (bytes) {
4037 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
4038 PFERR_WRITE_MASK,
4039 exception);
4040 unsigned offset = addr & (PAGE_SIZE-1);
4041 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
4042 int ret;
4043
4044 if (gpa == UNMAPPED_GVA)
4045 return X86EMUL_PROPAGATE_FAULT;
4046 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
4047 if (ret < 0) {
4048 r = X86EMUL_IO_NEEDED;
4049 goto out;
4050 }
4051
4052 bytes -= towrite;
4053 data += towrite;
4054 addr += towrite;
4055 }
4056out:
4057 return r;
4058}
4059EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4060
4061static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4062 gpa_t *gpa, struct x86_exception *exception,
4063 bool write)
4064{
4065 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
4066 | (write ? PFERR_WRITE_MASK : 0);
4067
4068 if (vcpu_match_mmio_gva(vcpu, gva)
4069 && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
4070 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4071 (gva & (PAGE_SIZE - 1));
4072 trace_vcpu_match_mmio(gva, *gpa, write, false);
4073 return 1;
4074 }
4075
4076 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4077
4078 if (*gpa == UNMAPPED_GVA)
4079 return -1;
4080
4081
4082 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4083 return 1;
4084
4085 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
4086 trace_vcpu_match_mmio(gva, *gpa, write, true);
4087 return 1;
4088 }
4089
4090 return 0;
4091}
4092
4093int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4094 const void *val, int bytes)
4095{
4096 int ret;
4097
4098 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
4099 if (ret < 0)
4100 return 0;
4101 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
4102 return 1;
4103}
4104
4105struct read_write_emulator_ops {
4106 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
4107 int bytes);
4108 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
4109 void *val, int bytes);
4110 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4111 int bytes, void *val);
4112 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4113 void *val, int bytes);
4114 bool write;
4115};
4116
4117static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4118{
4119 if (vcpu->mmio_read_completed) {
4120 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4121 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
4122 vcpu->mmio_read_completed = 0;
4123 return 1;
4124 }
4125
4126 return 0;
4127}
4128
4129static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4130 void *val, int bytes)
4131{
4132 return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
4133}
4134
4135static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4136 void *val, int bytes)
4137{
4138 return emulator_write_phys(vcpu, gpa, val, bytes);
4139}
4140
4141static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4142{
4143 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
4144 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4145}
4146
4147static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4148 void *val, int bytes)
4149{
4150 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
4151 return X86EMUL_IO_NEEDED;
4152}
4153
4154static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4155 void *val, int bytes)
4156{
4157 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
4158
4159 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
4160 return X86EMUL_CONTINUE;
4161}
4162
4163static const struct read_write_emulator_ops read_emultor = {
4164 .read_write_prepare = read_prepare,
4165 .read_write_emulate = read_emulate,
4166 .read_write_mmio = vcpu_mmio_read,
4167 .read_write_exit_mmio = read_exit_mmio,
4168};
4169
4170static const struct read_write_emulator_ops write_emultor = {
4171 .read_write_emulate = write_emulate,
4172 .read_write_mmio = write_mmio,
4173 .read_write_exit_mmio = write_exit_mmio,
4174 .write = true,
4175};
4176
4177static int emulator_read_write_onepage(unsigned long addr, void *val,
4178 unsigned int bytes,
4179 struct x86_exception *exception,
4180 struct kvm_vcpu *vcpu,
4181 const struct read_write_emulator_ops *ops)
4182{
4183 gpa_t gpa;
4184 int handled, ret;
4185 bool write = ops->write;
4186 struct kvm_mmio_fragment *frag;
4187
4188 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
4189
4190 if (ret < 0)
4191 return X86EMUL_PROPAGATE_FAULT;
4192
4193
4194 if (ret)
4195 goto mmio;
4196
4197 if (ops->read_write_emulate(vcpu, gpa, val, bytes))
4198 return X86EMUL_CONTINUE;
4199
4200mmio:
4201
4202
4203
4204 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
4205 if (handled == bytes)
4206 return X86EMUL_CONTINUE;
4207
4208 gpa += handled;
4209 bytes -= handled;
4210 val += handled;
4211
4212 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
4213 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
4214 frag->gpa = gpa;
4215 frag->data = val;
4216 frag->len = bytes;
4217 return X86EMUL_CONTINUE;
4218}
4219
4220int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
4221 void *val, unsigned int bytes,
4222 struct x86_exception *exception,
4223 const struct read_write_emulator_ops *ops)
4224{
4225 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4226 gpa_t gpa;
4227 int rc;
4228
4229 if (ops->read_write_prepare &&
4230 ops->read_write_prepare(vcpu, val, bytes))
4231 return X86EMUL_CONTINUE;
4232
4233 vcpu->mmio_nr_fragments = 0;
4234
4235
4236 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
4237 int now;
4238
4239 now = -addr & ~PAGE_MASK;
4240 rc = emulator_read_write_onepage(addr, val, now, exception,
4241 vcpu, ops);
4242
4243 if (rc != X86EMUL_CONTINUE)
4244 return rc;
4245 addr += now;
4246 val += now;
4247 bytes -= now;
4248 }
4249
4250 rc = emulator_read_write_onepage(addr, val, bytes, exception,
4251 vcpu, ops);
4252 if (rc != X86EMUL_CONTINUE)
4253 return rc;
4254
4255 if (!vcpu->mmio_nr_fragments)
4256 return rc;
4257
4258 gpa = vcpu->mmio_fragments[0].gpa;
4259
4260 vcpu->mmio_needed = 1;
4261 vcpu->mmio_cur_fragment = 0;
4262
4263 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
4264 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
4265 vcpu->run->exit_reason = KVM_EXIT_MMIO;
4266 vcpu->run->mmio.phys_addr = gpa;
4267
4268 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
4269}
4270
4271static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
4272 unsigned long addr,
4273 void *val,
4274 unsigned int bytes,
4275 struct x86_exception *exception)
4276{
4277 return emulator_read_write(ctxt, addr, val, bytes,
4278 exception, &read_emultor);
4279}
4280
4281int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
4282 unsigned long addr,
4283 const void *val,
4284 unsigned int bytes,
4285 struct x86_exception *exception)
4286{
4287 return emulator_read_write(ctxt, addr, (void *)val, bytes,
4288 exception, &write_emultor);
4289}
4290
4291#define CMPXCHG_TYPE(t, ptr, old, new) \
4292 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4293
4294#ifdef CONFIG_X86_64
4295# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4296#else
4297# define CMPXCHG64(ptr, old, new) \
4298 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4299#endif
4300
4301static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4302 unsigned long addr,
4303 const void *old,
4304 const void *new,
4305 unsigned int bytes,
4306 struct x86_exception *exception)
4307{
4308 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4309 gpa_t gpa;
4310 struct page *page;
4311 char *kaddr;
4312 bool exchanged;
4313
4314
4315 if (bytes > 8 || (bytes & (bytes - 1)))
4316 goto emul_write;
4317
4318 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4319
4320 if (gpa == UNMAPPED_GVA ||
4321 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4322 goto emul_write;
4323
4324 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4325 goto emul_write;
4326
4327 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4328 if (is_error_page(page))
4329 goto emul_write;
4330
4331 kaddr = kmap_atomic(page);
4332 kaddr += offset_in_page(gpa);
4333 switch (bytes) {
4334 case 1:
4335 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4336 break;
4337 case 2:
4338 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4339 break;
4340 case 4:
4341 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4342 break;
4343 case 8:
4344 exchanged = CMPXCHG64(kaddr, old, new);
4345 break;
4346 default:
4347 BUG();
4348 }
4349 kunmap_atomic(kaddr);
4350 kvm_release_page_dirty(page);
4351
4352 if (!exchanged)
4353 return X86EMUL_CMPXCHG_FAILED;
4354
4355 kvm_mmu_pte_write(vcpu, gpa, new, bytes);
4356
4357 return X86EMUL_CONTINUE;
4358
4359emul_write:
4360 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4361
4362 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4363}
4364
4365static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4366{
4367
4368 int r;
4369
4370 if (vcpu->arch.pio.in)
4371 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
4372 vcpu->arch.pio.size, pd);
4373 else
4374 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
4375 vcpu->arch.pio.port, vcpu->arch.pio.size,
4376 pd);
4377 return r;
4378}
4379
4380static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4381 unsigned short port, void *val,
4382 unsigned int count, bool in)
4383{
4384 trace_kvm_pio(!in, port, size, count);
4385
4386 vcpu->arch.pio.port = port;
4387 vcpu->arch.pio.in = in;
4388 vcpu->arch.pio.count = count;
4389 vcpu->arch.pio.size = size;
4390
4391 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4392 vcpu->arch.pio.count = 0;
4393 return 1;
4394 }
4395
4396 vcpu->run->exit_reason = KVM_EXIT_IO;
4397 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
4398 vcpu->run->io.size = size;
4399 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4400 vcpu->run->io.count = count;
4401 vcpu->run->io.port = port;
4402
4403 return 0;
4404}
4405
4406static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4407 int size, unsigned short port, void *val,
4408 unsigned int count)
4409{
4410 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4411 int ret;
4412
4413 if (vcpu->arch.pio.count)
4414 goto data_avail;
4415
4416 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
4417 if (ret) {
4418data_avail:
4419 memcpy(val, vcpu->arch.pio_data, size * count);
4420 vcpu->arch.pio.count = 0;
4421 return 1;
4422 }
4423
4424 return 0;
4425}
4426
4427static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4428 int size, unsigned short port,
4429 const void *val, unsigned int count)
4430{
4431 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4432
4433 memcpy(vcpu->arch.pio_data, val, size * count);
4434 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4435}
4436
4437static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4438{
4439 return kvm_x86_ops->get_segment_base(vcpu, seg);
4440}
4441
4442static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4443{
4444 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4445}
4446
4447int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4448{
4449 if (!need_emulate_wbinvd(vcpu))
4450 return X86EMUL_CONTINUE;
4451
4452 if (kvm_x86_ops->has_wbinvd_exit()) {
4453 int cpu = get_cpu();
4454
4455 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4456 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4457 wbinvd_ipi, NULL, 1);
4458 put_cpu();
4459 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4460 } else
4461 wbinvd();
4462 return X86EMUL_CONTINUE;
4463}
4464EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4465
4466static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4467{
4468 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4469}
4470
4471int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4472{
4473 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4474}
4475
4476int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4477{
4478
4479 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4480}
4481
4482static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4483{
4484 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4485}
4486
4487static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4488{
4489 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4490 unsigned long value;
4491
4492 switch (cr) {
4493 case 0:
4494 value = kvm_read_cr0(vcpu);
4495 break;
4496 case 2:
4497 value = vcpu->arch.cr2;
4498 break;
4499 case 3:
4500 value = kvm_read_cr3(vcpu);
4501 break;
4502 case 4:
4503 value = kvm_read_cr4(vcpu);
4504 break;
4505 case 8:
4506 value = kvm_get_cr8(vcpu);
4507 break;
4508 default:
4509 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4510 return 0;
4511 }
4512
4513 return value;
4514}
4515
4516static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4517{
4518 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4519 int res = 0;
4520
4521 switch (cr) {
4522 case 0:
4523 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4524 break;
4525 case 2:
4526 vcpu->arch.cr2 = val;
4527 break;
4528 case 3:
4529 res = kvm_set_cr3(vcpu, val);
4530 break;
4531 case 4:
4532 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4533 break;
4534 case 8:
4535 res = kvm_set_cr8(vcpu, val);
4536 break;
4537 default:
4538 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4539 res = -1;
4540 }
4541
4542 return res;
4543}
4544
4545static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
4546{
4547 kvm_set_rflags(emul_to_vcpu(ctxt), val);
4548}
4549
4550static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4551{
4552 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4553}
4554
4555static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4556{
4557 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4558}
4559
4560static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4561{
4562 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4563}
4564
4565static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4566{
4567 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4568}
4569
4570static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4571{
4572 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4573}
4574
4575static unsigned long emulator_get_cached_segment_base(
4576 struct x86_emulate_ctxt *ctxt, int seg)
4577{
4578 return get_segment_base(emul_to_vcpu(ctxt), seg);
4579}
4580
4581static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4582 struct desc_struct *desc, u32 *base3,
4583 int seg)
4584{
4585 struct kvm_segment var;
4586
4587 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4588 *selector = var.selector;
4589
4590 if (var.unusable) {
4591 memset(desc, 0, sizeof(*desc));
4592 return false;
4593 }
4594
4595 if (var.g)
4596 var.limit >>= 12;
4597 set_desc_limit(desc, var.limit);
4598 set_desc_base(desc, (unsigned long)var.base);
4599#ifdef CONFIG_X86_64
4600 if (base3)
4601 *base3 = var.base >> 32;
4602#endif
4603 desc->type = var.type;
4604 desc->s = var.s;
4605 desc->dpl = var.dpl;
4606 desc->p = var.present;
4607 desc->avl = var.avl;
4608 desc->l = var.l;
4609 desc->d = var.db;
4610 desc->g = var.g;
4611
4612 return true;
4613}
4614
4615static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4616 struct desc_struct *desc, u32 base3,
4617 int seg)
4618{
4619 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4620 struct kvm_segment var;
4621
4622 var.selector = selector;
4623 var.base = get_desc_base(desc);
4624#ifdef CONFIG_X86_64
4625 var.base |= ((u64)base3) << 32;
4626#endif
4627 var.limit = get_desc_limit(desc);
4628 if (desc->g)
4629 var.limit = (var.limit << 12) | 0xfff;
4630 var.type = desc->type;
4631 var.present = desc->p;
4632 var.dpl = desc->dpl;
4633 var.db = desc->d;
4634 var.s = desc->s;
4635 var.l = desc->l;
4636 var.g = desc->g;
4637 var.avl = desc->avl;
4638 var.present = desc->p;
4639 var.unusable = !var.present;
4640 var.padding = 0;
4641
4642 kvm_set_segment(vcpu, &var, seg);
4643 return;
4644}
4645
4646static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4647 u32 msr_index, u64 *pdata)
4648{
4649 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4650}
4651
4652static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4653 u32 msr_index, u64 data)
4654{
4655 struct msr_data msr;
4656
4657 msr.data = data;
4658 msr.index = msr_index;
4659 msr.host_initiated = false;
4660 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4661}
4662
4663static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
4664 u32 pmc, u64 *pdata)
4665{
4666 return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
4667}
4668
4669static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4670{
4671 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4672}
4673
4674static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4675{
4676 preempt_disable();
4677 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4678
4679
4680
4681
4682 clts();
4683}
4684
4685static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4686{
4687 preempt_enable();
4688}
4689
4690static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4691 struct x86_instruction_info *info,
4692 enum x86_intercept_stage stage)
4693{
4694 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4695}
4696
4697static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4698 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4699{
4700 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
4701}
4702
4703static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
4704{
4705 return kvm_register_read(emul_to_vcpu(ctxt), reg);
4706}
4707
4708static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
4709{
4710 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
4711}
4712
4713static const struct x86_emulate_ops emulate_ops = {
4714 .read_gpr = emulator_read_gpr,
4715 .write_gpr = emulator_write_gpr,
4716 .read_std = kvm_read_guest_virt_system,
4717 .write_std = kvm_write_guest_virt_system,
4718 .fetch = kvm_fetch_guest_virt,
4719 .read_emulated = emulator_read_emulated,
4720 .write_emulated = emulator_write_emulated,
4721 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4722 .invlpg = emulator_invlpg,
4723 .pio_in_emulated = emulator_pio_in_emulated,
4724 .pio_out_emulated = emulator_pio_out_emulated,
4725 .get_segment = emulator_get_segment,
4726 .set_segment = emulator_set_segment,
4727 .get_cached_segment_base = emulator_get_cached_segment_base,
4728 .get_gdt = emulator_get_gdt,
4729 .get_idt = emulator_get_idt,
4730 .set_gdt = emulator_set_gdt,
4731 .set_idt = emulator_set_idt,
4732 .get_cr = emulator_get_cr,
4733 .set_cr = emulator_set_cr,
4734 .set_rflags = emulator_set_rflags,
4735 .cpl = emulator_get_cpl,
4736 .get_dr = emulator_get_dr,
4737 .set_dr = emulator_set_dr,
4738 .set_msr = emulator_set_msr,
4739 .get_msr = emulator_get_msr,
4740 .read_pmc = emulator_read_pmc,
4741 .halt = emulator_halt,
4742 .wbinvd = emulator_wbinvd,
4743 .fix_hypercall = emulator_fix_hypercall,
4744 .get_fpu = emulator_get_fpu,
4745 .put_fpu = emulator_put_fpu,
4746 .intercept = emulator_intercept,
4747 .get_cpuid = emulator_get_cpuid,
4748};
4749
4750static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4751{
4752 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4753
4754
4755
4756
4757
4758
4759
4760 if (!(int_shadow & mask))
4761 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4762}
4763
4764static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4765{
4766 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4767 if (ctxt->exception.vector == PF_VECTOR)
4768 kvm_propagate_fault(vcpu, &ctxt->exception);
4769 else if (ctxt->exception.error_code_valid)
4770 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4771 ctxt->exception.error_code);
4772 else
4773 kvm_queue_exception(vcpu, ctxt->exception.vector);
4774}
4775
4776static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4777{
4778 memset(&ctxt->twobyte, 0,
4779 (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
4780
4781 ctxt->fetch.start = 0;
4782 ctxt->fetch.end = 0;
4783 ctxt->io_read.pos = 0;
4784 ctxt->io_read.end = 0;
4785 ctxt->mem_read.pos = 0;
4786 ctxt->mem_read.end = 0;
4787}
4788
4789static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4790{
4791 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4792 int cs_db, cs_l;
4793
4794 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4795
4796 ctxt->eflags = kvm_get_rflags(vcpu);
4797 ctxt->eip = kvm_rip_read(vcpu);
4798 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4799 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4800 cs_l ? X86EMUL_MODE_PROT64 :
4801 cs_db ? X86EMUL_MODE_PROT32 :
4802 X86EMUL_MODE_PROT16;
4803 ctxt->guest_mode = is_guest_mode(vcpu);
4804
4805 init_decode_cache(ctxt);
4806 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4807}
4808
4809int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4810{
4811 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4812 int ret;
4813
4814 init_emulate_ctxt(vcpu);
4815
4816 ctxt->op_bytes = 2;
4817 ctxt->ad_bytes = 2;
4818 ctxt->_eip = ctxt->eip + inc_eip;
4819 ret = emulate_int_real(ctxt, irq);
4820
4821 if (ret != X86EMUL_CONTINUE)
4822 return EMULATE_FAIL;
4823
4824 ctxt->eip = ctxt->_eip;
4825 kvm_rip_write(vcpu, ctxt->eip);
4826 kvm_set_rflags(vcpu, ctxt->eflags);
4827
4828 if (irq == NMI_VECTOR)
4829 vcpu->arch.nmi_pending = 0;
4830 else
4831 vcpu->arch.interrupt.pending = false;
4832
4833 return EMULATE_DONE;
4834}
4835EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4836
4837static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4838{
4839 int r = EMULATE_DONE;
4840
4841 ++vcpu->stat.insn_emulation_fail;
4842 trace_kvm_emulate_insn_failed(vcpu);
4843 if (!is_guest_mode(vcpu)) {
4844 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4845 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4846 vcpu->run->internal.ndata = 0;
4847 r = EMULATE_FAIL;
4848 }
4849 kvm_queue_exception(vcpu, UD_VECTOR);
4850
4851 return r;
4852}
4853
4854static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4855 bool write_fault_to_shadow_pgtable,
4856 int emulation_type)
4857{
4858 gpa_t gpa = cr2;
4859 pfn_t pfn;
4860
4861 if (emulation_type & EMULTYPE_NO_REEXECUTE)
4862 return false;
4863
4864 if (!vcpu->arch.mmu.direct_map) {
4865
4866
4867
4868
4869 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4870
4871
4872
4873
4874
4875 if (gpa == UNMAPPED_GVA)
4876 return true;
4877 }
4878
4879
4880
4881
4882
4883
4884
4885 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4886
4887
4888
4889
4890
4891 if (is_error_noslot_pfn(pfn))
4892 return false;
4893
4894 kvm_release_pfn_clean(pfn);
4895
4896
4897 if (vcpu->arch.mmu.direct_map) {
4898 unsigned int indirect_shadow_pages;
4899
4900 spin_lock(&vcpu->kvm->mmu_lock);
4901 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
4902 spin_unlock(&vcpu->kvm->mmu_lock);
4903
4904 if (indirect_shadow_pages)
4905 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4906
4907 return true;
4908 }
4909
4910
4911
4912
4913
4914
4915 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4916
4917
4918
4919
4920
4921
4922 return !write_fault_to_shadow_pgtable;
4923}
4924
4925static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4926 unsigned long cr2, int emulation_type)
4927{
4928 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4929 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
4930
4931 last_retry_eip = vcpu->arch.last_retry_eip;
4932 last_retry_addr = vcpu->arch.last_retry_addr;
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
4948
4949 if (!(emulation_type & EMULTYPE_RETRY))
4950 return false;
4951
4952 if (x86_page_table_writing_insn(ctxt))
4953 return false;
4954
4955 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
4956 return false;
4957
4958 vcpu->arch.last_retry_eip = ctxt->eip;
4959 vcpu->arch.last_retry_addr = cr2;
4960
4961 if (!vcpu->arch.mmu.direct_map)
4962 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4963
4964 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
4965
4966 return true;
4967}
4968
4969static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
4970static int complete_emulated_pio(struct kvm_vcpu *vcpu);
4971
4972static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
4973 unsigned long *db)
4974{
4975 u32 dr6 = 0;
4976 int i;
4977 u32 enable, rwlen;
4978
4979 enable = dr7;
4980 rwlen = dr7 >> 16;
4981 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
4982 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
4983 dr6 |= (1 << i);
4984 return dr6;
4985}
4986
4987static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
4988{
4989 struct kvm_run *kvm_run = vcpu->run;
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5000
5001 if (unlikely(rflags & X86_EFLAGS_TF)) {
5002 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5003 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
5004 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
5005 kvm_run->debug.arch.exception = DB_VECTOR;
5006 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5007 *r = EMULATE_USER_EXIT;
5008 } else {
5009 vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
5010
5011
5012
5013
5014
5015 vcpu->arch.dr6 &= ~15;
5016 vcpu->arch.dr6 |= DR6_BS;
5017 kvm_queue_exception(vcpu, DB_VECTOR);
5018 }
5019 }
5020}
5021
5022static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5023{
5024 struct kvm_run *kvm_run = vcpu->run;
5025 unsigned long eip = vcpu->arch.emulate_ctxt.eip;
5026 u32 dr6 = 0;
5027
5028 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
5029 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
5030 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5031 vcpu->arch.guest_debug_dr7,
5032 vcpu->arch.eff_db);
5033
5034 if (dr6 != 0) {
5035 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
5036 kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
5037 get_segment_base(vcpu, VCPU_SREG_CS);
5038
5039 kvm_run->debug.arch.exception = DB_VECTOR;
5040 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5041 *r = EMULATE_USER_EXIT;
5042 return true;
5043 }
5044 }
5045
5046 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) {
5047 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5048 vcpu->arch.dr7,
5049 vcpu->arch.db);
5050
5051 if (dr6 != 0) {
5052 vcpu->arch.dr6 &= ~15;
5053 vcpu->arch.dr6 |= dr6;
5054 kvm_queue_exception(vcpu, DB_VECTOR);
5055 *r = EMULATE_DONE;
5056 return true;
5057 }
5058 }
5059
5060 return false;
5061}
5062
5063int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5064 unsigned long cr2,
5065 int emulation_type,
5066 void *insn,
5067 int insn_len)
5068{
5069 int r;
5070 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5071 bool writeback = true;
5072 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
5073
5074
5075
5076
5077
5078 vcpu->arch.write_fault_to_shadow_pgtable = false;
5079 kvm_clear_exception_queue(vcpu);
5080
5081 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
5082 init_emulate_ctxt(vcpu);
5083
5084
5085
5086
5087
5088
5089
5090 if (kvm_vcpu_check_breakpoint(vcpu, &r))
5091 return r;
5092
5093 ctxt->interruptibility = 0;
5094 ctxt->have_exception = false;
5095 ctxt->perm_ok = false;
5096
5097 ctxt->only_vendor_specific_insn
5098 = emulation_type & EMULTYPE_TRAP_UD;
5099
5100 r = x86_decode_insn(ctxt, insn, insn_len);
5101
5102 trace_kvm_emulate_insn_start(vcpu);
5103 ++vcpu->stat.insn_emulation;
5104 if (r != EMULATION_OK) {
5105 if (emulation_type & EMULTYPE_TRAP_UD)
5106 return EMULATE_FAIL;
5107 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5108 emulation_type))
5109 return EMULATE_DONE;
5110 if (emulation_type & EMULTYPE_SKIP)
5111 return EMULATE_FAIL;
5112 return handle_emulation_failure(vcpu);
5113 }
5114 }
5115
5116 if (emulation_type & EMULTYPE_SKIP) {
5117 kvm_rip_write(vcpu, ctxt->_eip);
5118 return EMULATE_DONE;
5119 }
5120
5121 if (retry_instruction(ctxt, cr2, emulation_type))
5122 return EMULATE_DONE;
5123
5124
5125
5126 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
5127 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5128 emulator_invalidate_register_cache(ctxt);
5129 }
5130
5131restart:
5132 r = x86_emulate_insn(ctxt);
5133
5134 if (r == EMULATION_INTERCEPTED)
5135 return EMULATE_DONE;
5136
5137 if (r == EMULATION_FAILED) {
5138 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5139 emulation_type))
5140 return EMULATE_DONE;
5141
5142 return handle_emulation_failure(vcpu);
5143 }
5144
5145 if (ctxt->have_exception) {
5146 inject_emulated_exception(vcpu);
5147 r = EMULATE_DONE;
5148 } else if (vcpu->arch.pio.count) {
5149 if (!vcpu->arch.pio.in) {
5150
5151 vcpu->arch.pio.count = 0;
5152 } else {
5153 writeback = false;
5154 vcpu->arch.complete_userspace_io = complete_emulated_pio;
5155 }
5156 r = EMULATE_USER_EXIT;
5157 } else if (vcpu->mmio_needed) {
5158 if (!vcpu->mmio_is_write)
5159 writeback = false;
5160 r = EMULATE_USER_EXIT;
5161 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
5162 } else if (r == EMULATION_RESTART)
5163 goto restart;
5164 else
5165 r = EMULATE_DONE;
5166
5167 if (writeback) {
5168 toggle_interruptibility(vcpu, ctxt->interruptibility);
5169 kvm_make_request(KVM_REQ_EVENT, vcpu);
5170 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5171 kvm_rip_write(vcpu, ctxt->eip);
5172 if (r == EMULATE_DONE)
5173 kvm_vcpu_check_singlestep(vcpu, &r);
5174 kvm_set_rflags(vcpu, ctxt->eflags);
5175 } else
5176 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
5177
5178 return r;
5179}
5180EXPORT_SYMBOL_GPL(x86_emulate_instruction);
5181
5182int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
5183{
5184 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
5185 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
5186 size, port, &val, 1);
5187
5188 vcpu->arch.pio.count = 0;
5189 return ret;
5190}
5191EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
5192
5193static void tsc_bad(void *info)
5194{
5195 __this_cpu_write(cpu_tsc_khz, 0);
5196}
5197
5198static void tsc_khz_changed(void *data)
5199{
5200 struct cpufreq_freqs *freq = data;
5201 unsigned long khz = 0;
5202
5203 if (data)
5204 khz = freq->new;
5205 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5206 khz = cpufreq_quick_get(raw_smp_processor_id());
5207 if (!khz)
5208 khz = tsc_khz;
5209 __this_cpu_write(cpu_tsc_khz, khz);
5210}
5211
5212static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
5213 void *data)
5214{
5215 struct cpufreq_freqs *freq = data;
5216 struct kvm *kvm;
5217 struct kvm_vcpu *vcpu;
5218 int i, send_ipi = 0;
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
5260 return 0;
5261 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
5262 return 0;
5263
5264 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5265
5266 raw_spin_lock(&kvm_lock);
5267 list_for_each_entry(kvm, &vm_list, vm_list) {
5268 kvm_for_each_vcpu(i, vcpu, kvm) {
5269 if (vcpu->cpu != freq->cpu)
5270 continue;
5271 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5272 if (vcpu->cpu != smp_processor_id())
5273 send_ipi = 1;
5274 }
5275 }
5276 raw_spin_unlock(&kvm_lock);
5277
5278 if (freq->old < freq->new && send_ipi) {
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5292 }
5293 return 0;
5294}
5295
5296static struct notifier_block kvmclock_cpufreq_notifier_block = {
5297 .notifier_call = kvmclock_cpufreq_notifier
5298};
5299
5300static int kvmclock_cpu_notifier(struct notifier_block *nfb,
5301 unsigned long action, void *hcpu)
5302{
5303 unsigned int cpu = (unsigned long)hcpu;
5304
5305 switch (action) {
5306 case CPU_ONLINE:
5307 case CPU_DOWN_FAILED:
5308 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5309 break;
5310 case CPU_DOWN_PREPARE:
5311 smp_call_function_single(cpu, tsc_bad, NULL, 1);
5312 break;
5313 }
5314 return NOTIFY_OK;
5315}
5316
5317static struct notifier_block kvmclock_cpu_notifier_block = {
5318 .notifier_call = kvmclock_cpu_notifier,
5319 .priority = -INT_MAX
5320};
5321
5322static void kvm_timer_init(void)
5323{
5324 int cpu;
5325
5326 max_tsc_khz = tsc_khz;
5327 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5328 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5329#ifdef CONFIG_CPU_FREQ
5330 struct cpufreq_policy policy;
5331 memset(&policy, 0, sizeof(policy));
5332 cpu = get_cpu();
5333 cpufreq_get_policy(&policy, cpu);
5334 if (policy.cpuinfo.max_freq)
5335 max_tsc_khz = policy.cpuinfo.max_freq;
5336 put_cpu();
5337#endif
5338 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
5339 CPUFREQ_TRANSITION_NOTIFIER);
5340 }
5341 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
5342 for_each_online_cpu(cpu)
5343 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5344}
5345
5346static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
5347
5348int kvm_is_in_guest(void)
5349{
5350 return __this_cpu_read(current_vcpu) != NULL;
5351}
5352
5353static int kvm_is_user_mode(void)
5354{
5355 int user_mode = 3;
5356
5357 if (__this_cpu_read(current_vcpu))
5358 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
5359
5360 return user_mode != 0;
5361}
5362
5363static unsigned long kvm_get_guest_ip(void)
5364{
5365 unsigned long ip = 0;
5366
5367 if (__this_cpu_read(current_vcpu))
5368 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
5369
5370 return ip;
5371}
5372
5373static struct perf_guest_info_callbacks kvm_guest_cbs = {
5374 .is_in_guest = kvm_is_in_guest,
5375 .is_user_mode = kvm_is_user_mode,
5376 .get_guest_ip = kvm_get_guest_ip,
5377};
5378
5379void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
5380{
5381 __this_cpu_write(current_vcpu, vcpu);
5382}
5383EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
5384
5385void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
5386{
5387 __this_cpu_write(current_vcpu, NULL);
5388}
5389EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
5390
5391static void kvm_set_mmio_spte_mask(void)
5392{
5393 u64 mask;
5394 int maxphyaddr = boot_cpu_data.x86_phys_bits;
5395
5396
5397
5398
5399
5400
5401 mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
5402
5403
5404 mask |= 0x3ull << 62;
5405
5406
5407 mask |= 1ull;
5408
5409#ifdef CONFIG_X86_64
5410
5411
5412
5413
5414 if (maxphyaddr == 52)
5415 mask &= ~1ull;
5416#endif
5417
5418 kvm_mmu_set_mmio_spte_mask(mask);
5419}
5420
5421#ifdef CONFIG_X86_64
5422static void pvclock_gtod_update_fn(struct work_struct *work)
5423{
5424 struct kvm *kvm;
5425
5426 struct kvm_vcpu *vcpu;
5427 int i;
5428
5429 raw_spin_lock(&kvm_lock);
5430 list_for_each_entry(kvm, &vm_list, vm_list)
5431 kvm_for_each_vcpu(i, vcpu, kvm)
5432 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5433 atomic_set(&kvm_guest_has_master_clock, 0);
5434 raw_spin_unlock(&kvm_lock);
5435}
5436
5437static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
5438
5439
5440
5441
5442static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
5443 void *priv)
5444{
5445 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
5446 struct timekeeper *tk = priv;
5447
5448 update_pvclock_gtod(tk);
5449
5450
5451
5452
5453 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
5454 atomic_read(&kvm_guest_has_master_clock) != 0)
5455 queue_work(system_long_wq, &pvclock_gtod_work);
5456
5457 return 0;
5458}
5459
5460static struct notifier_block pvclock_gtod_notifier = {
5461 .notifier_call = pvclock_gtod_notify,
5462};
5463#endif
5464
5465int kvm_arch_init(void *opaque)
5466{
5467 int r;
5468 struct kvm_x86_ops *ops = opaque;
5469
5470 if (kvm_x86_ops) {
5471 printk(KERN_ERR "kvm: already loaded the other module\n");
5472 r = -EEXIST;
5473 goto out;
5474 }
5475
5476 if (!ops->cpu_has_kvm_support()) {
5477 printk(KERN_ERR "kvm: no hardware support\n");
5478 r = -EOPNOTSUPP;
5479 goto out;
5480 }
5481 if (ops->disabled_by_bios()) {
5482 printk(KERN_ERR "kvm: disabled by bios\n");
5483 r = -EOPNOTSUPP;
5484 goto out;
5485 }
5486
5487 r = -ENOMEM;
5488 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
5489 if (!shared_msrs) {
5490 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
5491 goto out;
5492 }
5493
5494 r = kvm_mmu_module_init();
5495 if (r)
5496 goto out_free_percpu;
5497
5498 kvm_set_mmio_spte_mask();
5499 kvm_init_msr_list();
5500
5501 kvm_x86_ops = ops;
5502 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5503 PT_DIRTY_MASK, PT64_NX_MASK, 0);
5504
5505 kvm_timer_init();
5506
5507 perf_register_guest_info_callbacks(&kvm_guest_cbs);
5508
5509 if (cpu_has_xsave)
5510 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
5511
5512 kvm_lapic_init();
5513#ifdef CONFIG_X86_64
5514 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
5515#endif
5516
5517 return 0;
5518
5519out_free_percpu:
5520 free_percpu(shared_msrs);
5521out:
5522 return r;
5523}
5524
5525void kvm_arch_exit(void)
5526{
5527 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
5528
5529 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5530 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
5531 CPUFREQ_TRANSITION_NOTIFIER);
5532 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5533#ifdef CONFIG_X86_64
5534 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
5535#endif
5536 kvm_x86_ops = NULL;
5537 kvm_mmu_module_exit();
5538 free_percpu(shared_msrs);
5539}
5540
5541int kvm_emulate_halt(struct kvm_vcpu *vcpu)
5542{
5543 ++vcpu->stat.halt_exits;
5544 if (irqchip_in_kernel(vcpu->kvm)) {
5545 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
5546 return 1;
5547 } else {
5548 vcpu->run->exit_reason = KVM_EXIT_HLT;
5549 return 0;
5550 }
5551}
5552EXPORT_SYMBOL_GPL(kvm_emulate_halt);
5553
5554int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5555{
5556 u64 param, ingpa, outgpa, ret;
5557 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5558 bool fast, longmode;
5559 int cs_db, cs_l;
5560
5561
5562
5563
5564
5565 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
5566 kvm_queue_exception(vcpu, UD_VECTOR);
5567 return 0;
5568 }
5569
5570 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
5571 longmode = is_long_mode(vcpu) && cs_l == 1;
5572
5573 if (!longmode) {
5574 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
5575 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
5576 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
5577 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
5578 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
5579 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
5580 }
5581#ifdef CONFIG_X86_64
5582 else {
5583 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
5584 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
5585 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
5586 }
5587#endif
5588
5589 code = param & 0xffff;
5590 fast = (param >> 16) & 0x1;
5591 rep_cnt = (param >> 32) & 0xfff;
5592 rep_idx = (param >> 48) & 0xfff;
5593
5594 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
5595
5596 switch (code) {
5597 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5598 kvm_vcpu_on_spin(vcpu);
5599 break;
5600 default:
5601 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5602 break;
5603 }
5604
5605 ret = res | (((u64)rep_done & 0xfff) << 32);
5606 if (longmode) {
5607 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5608 } else {
5609 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5610 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5611 }
5612
5613 return 1;
5614}
5615
5616
5617
5618
5619
5620
5621static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
5622{
5623 struct kvm_lapic_irq lapic_irq;
5624
5625 lapic_irq.shorthand = 0;
5626 lapic_irq.dest_mode = 0;
5627 lapic_irq.dest_id = apicid;
5628
5629 lapic_irq.delivery_mode = APIC_DM_REMRD;
5630 kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
5631}
5632
5633int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5634{
5635 unsigned long nr, a0, a1, a2, a3, ret;
5636 int r = 1;
5637
5638 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5639 return kvm_hv_hypercall(vcpu);
5640
5641 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5642 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5643 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5644 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5645 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5646
5647 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5648
5649 if (!is_long_mode(vcpu)) {
5650 nr &= 0xFFFFFFFF;
5651 a0 &= 0xFFFFFFFF;
5652 a1 &= 0xFFFFFFFF;
5653 a2 &= 0xFFFFFFFF;
5654 a3 &= 0xFFFFFFFF;
5655 }
5656
5657 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5658 ret = -KVM_EPERM;
5659 goto out;
5660 }
5661
5662 switch (nr) {
5663 case KVM_HC_VAPIC_POLL_IRQ:
5664 ret = 0;
5665 break;
5666 case KVM_HC_KICK_CPU:
5667 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
5668 ret = 0;
5669 break;
5670 default:
5671 ret = -KVM_ENOSYS;
5672 break;
5673 }
5674out:
5675 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5676 ++vcpu->stat.hypercalls;
5677 return r;
5678}
5679EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5680
5681static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5682{
5683 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5684 char instruction[3];
5685 unsigned long rip = kvm_rip_read(vcpu);
5686
5687 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5688
5689 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
5690}
5691
5692
5693
5694
5695
5696
5697
5698static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5699{
5700 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5701 vcpu->run->request_interrupt_window &&
5702 kvm_arch_interrupt_allowed(vcpu));
5703}
5704
5705static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5706{
5707 struct kvm_run *kvm_run = vcpu->run;
5708
5709 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5710 kvm_run->cr8 = kvm_get_cr8(vcpu);
5711 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5712 if (irqchip_in_kernel(vcpu->kvm))
5713 kvm_run->ready_for_interrupt_injection = 1;
5714 else
5715 kvm_run->ready_for_interrupt_injection =
5716 kvm_arch_interrupt_allowed(vcpu) &&
5717 !kvm_cpu_has_interrupt(vcpu) &&
5718 !kvm_event_needs_reinjection(vcpu);
5719}
5720
5721static int vapic_enter(struct kvm_vcpu *vcpu)
5722{
5723 struct kvm_lapic *apic = vcpu->arch.apic;
5724 struct page *page;
5725
5726 if (!apic || !apic->vapic_addr)
5727 return 0;
5728
5729 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5730 if (is_error_page(page))
5731 return -EFAULT;
5732
5733 vcpu->arch.apic->vapic_page = page;
5734 return 0;
5735}
5736
5737static void vapic_exit(struct kvm_vcpu *vcpu)
5738{
5739 struct kvm_lapic *apic = vcpu->arch.apic;
5740 int idx;
5741
5742 if (!apic || !apic->vapic_addr)
5743 return;
5744
5745 idx = srcu_read_lock(&vcpu->kvm->srcu);
5746 kvm_release_page_dirty(apic->vapic_page);
5747 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5748 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5749}
5750
5751static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5752{
5753 int max_irr, tpr;
5754
5755 if (!kvm_x86_ops->update_cr8_intercept)
5756 return;
5757
5758 if (!vcpu->arch.apic)
5759 return;
5760
5761 if (!vcpu->arch.apic->vapic_addr)
5762 max_irr = kvm_lapic_find_highest_irr(vcpu);
5763 else
5764 max_irr = -1;
5765
5766 if (max_irr != -1)
5767 max_irr >>= 4;
5768
5769 tpr = kvm_lapic_get_cr8(vcpu);
5770
5771 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5772}
5773
5774static void inject_pending_event(struct kvm_vcpu *vcpu)
5775{
5776
5777 if (vcpu->arch.exception.pending) {
5778 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5779 vcpu->arch.exception.has_error_code,
5780 vcpu->arch.exception.error_code);
5781 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5782 vcpu->arch.exception.has_error_code,
5783 vcpu->arch.exception.error_code,
5784 vcpu->arch.exception.reinject);
5785 return;
5786 }
5787
5788 if (vcpu->arch.nmi_injected) {
5789 kvm_x86_ops->set_nmi(vcpu);
5790 return;
5791 }
5792
5793 if (vcpu->arch.interrupt.pending) {
5794 kvm_x86_ops->set_irq(vcpu);
5795 return;
5796 }
5797
5798
5799 if (vcpu->arch.nmi_pending) {
5800 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5801 --vcpu->arch.nmi_pending;
5802 vcpu->arch.nmi_injected = true;
5803 kvm_x86_ops->set_nmi(vcpu);
5804 }
5805 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5806 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5807 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5808 false);
5809 kvm_x86_ops->set_irq(vcpu);
5810 }
5811 }
5812}
5813
5814static void process_nmi(struct kvm_vcpu *vcpu)
5815{
5816 unsigned limit = 2;
5817
5818
5819
5820
5821
5822
5823 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
5824 limit = 1;
5825
5826 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
5827 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
5828 kvm_make_request(KVM_REQ_EVENT, vcpu);
5829}
5830
5831static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5832{
5833 u64 eoi_exit_bitmap[4];
5834 u32 tmr[8];
5835
5836 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
5837 return;
5838
5839 memset(eoi_exit_bitmap, 0, 32);
5840 memset(tmr, 0, 32);
5841
5842 kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
5843 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5844 kvm_apic_update_tmr(vcpu, tmr);
5845}
5846
5847static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5848{
5849 int r;
5850 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5851 vcpu->run->request_interrupt_window;
5852 bool req_immediate_exit = false;
5853
5854 if (vcpu->requests) {
5855 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
5856 kvm_mmu_unload(vcpu);
5857 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5858 __kvm_migrate_timers(vcpu);
5859 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
5860 kvm_gen_update_masterclock(vcpu->kvm);
5861 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
5862 kvm_gen_kvmclock_update(vcpu);
5863 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5864 r = kvm_guest_time_update(vcpu);
5865 if (unlikely(r))
5866 goto out;
5867 }
5868 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
5869 kvm_mmu_sync_roots(vcpu);
5870 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
5871 kvm_x86_ops->tlb_flush(vcpu);
5872 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
5873 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
5874 r = 0;
5875 goto out;
5876 }
5877 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
5878 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5879 r = 0;
5880 goto out;
5881 }
5882 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
5883 vcpu->fpu_active = 0;
5884 kvm_x86_ops->fpu_deactivate(vcpu);
5885 }
5886 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5887
5888 vcpu->arch.apf.halted = true;
5889 r = 1;
5890 goto out;
5891 }
5892 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
5893 record_steal_time(vcpu);
5894 if (kvm_check_request(KVM_REQ_NMI, vcpu))
5895 process_nmi(vcpu);
5896 if (kvm_check_request(KVM_REQ_PMU, vcpu))
5897 kvm_handle_pmu_event(vcpu);
5898 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5899 kvm_deliver_pmi(vcpu);
5900 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
5901 vcpu_scan_ioapic(vcpu);
5902 }
5903
5904 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5905 kvm_apic_accept_events(vcpu);
5906 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
5907 r = 1;
5908 goto out;
5909 }
5910
5911 inject_pending_event(vcpu);
5912
5913
5914 if (vcpu->arch.nmi_pending)
5915 req_immediate_exit =
5916 kvm_x86_ops->enable_nmi_window(vcpu) != 0;
5917 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5918 req_immediate_exit =
5919 kvm_x86_ops->enable_irq_window(vcpu) != 0;
5920
5921 if (kvm_lapic_enabled(vcpu)) {
5922
5923
5924
5925
5926 if (kvm_x86_ops->hwapic_irr_update)
5927 kvm_x86_ops->hwapic_irr_update(vcpu,
5928 kvm_lapic_find_highest_irr(vcpu));
5929 update_cr8_intercept(vcpu);
5930 kvm_lapic_sync_to_vapic(vcpu);
5931 }
5932 }
5933
5934 r = kvm_mmu_reload(vcpu);
5935 if (unlikely(r)) {
5936 goto cancel_injection;
5937 }
5938
5939 preempt_disable();
5940
5941 kvm_x86_ops->prepare_guest_switch(vcpu);
5942 if (vcpu->fpu_active)
5943 kvm_load_guest_fpu(vcpu);
5944 kvm_load_guest_xcr0(vcpu);
5945
5946 vcpu->mode = IN_GUEST_MODE;
5947
5948
5949
5950
5951 smp_mb();
5952
5953 local_irq_disable();
5954
5955 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
5956 || need_resched() || signal_pending(current)) {
5957 vcpu->mode = OUTSIDE_GUEST_MODE;
5958 smp_wmb();
5959 local_irq_enable();
5960 preempt_enable();
5961 r = 1;
5962 goto cancel_injection;
5963 }
5964
5965 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5966
5967 if (req_immediate_exit)
5968 smp_send_reschedule(vcpu->cpu);
5969
5970 kvm_guest_enter();
5971
5972 if (unlikely(vcpu->arch.switch_db_regs)) {
5973 set_debugreg(0, 7);
5974 set_debugreg(vcpu->arch.eff_db[0], 0);
5975 set_debugreg(vcpu->arch.eff_db[1], 1);
5976 set_debugreg(vcpu->arch.eff_db[2], 2);
5977 set_debugreg(vcpu->arch.eff_db[3], 3);
5978 }
5979
5980 trace_kvm_entry(vcpu->vcpu_id);
5981 kvm_x86_ops->run(vcpu);
5982
5983
5984
5985
5986
5987
5988
5989
5990 if (hw_breakpoint_active())
5991 hw_breakpoint_restore();
5992
5993 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
5994 native_read_tsc());
5995
5996 vcpu->mode = OUTSIDE_GUEST_MODE;
5997 smp_wmb();
5998
5999
6000 kvm_x86_ops->handle_external_intr(vcpu);
6001
6002 ++vcpu->stat.exits;
6003
6004
6005
6006
6007
6008
6009
6010 barrier();
6011
6012 kvm_guest_exit();
6013
6014 preempt_enable();
6015
6016 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6017
6018
6019
6020
6021 if (unlikely(prof_on == KVM_PROFILING)) {
6022 unsigned long rip = kvm_rip_read(vcpu);
6023 profile_hit(KVM_PROFILING, (void *)rip);
6024 }
6025
6026 if (unlikely(vcpu->arch.tsc_always_catchup))
6027 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6028
6029 if (vcpu->arch.apic_attention)
6030 kvm_lapic_sync_from_vapic(vcpu);
6031
6032 r = kvm_x86_ops->handle_exit(vcpu);
6033 return r;
6034
6035cancel_injection:
6036 kvm_x86_ops->cancel_injection(vcpu);
6037 if (unlikely(vcpu->arch.apic_attention))
6038 kvm_lapic_sync_from_vapic(vcpu);
6039out:
6040 return r;
6041}
6042
6043
6044static int __vcpu_run(struct kvm_vcpu *vcpu)
6045{
6046 int r;
6047 struct kvm *kvm = vcpu->kvm;
6048
6049 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6050 r = vapic_enter(vcpu);
6051 if (r) {
6052 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6053 return r;
6054 }
6055
6056 r = 1;
6057 while (r > 0) {
6058 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6059 !vcpu->arch.apf.halted)
6060 r = vcpu_enter_guest(vcpu);
6061 else {
6062 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6063 kvm_vcpu_block(vcpu);
6064 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6065 if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
6066 kvm_apic_accept_events(vcpu);
6067 switch(vcpu->arch.mp_state) {
6068 case KVM_MP_STATE_HALTED:
6069 vcpu->arch.pv.pv_unhalted = false;
6070 vcpu->arch.mp_state =
6071 KVM_MP_STATE_RUNNABLE;
6072 case KVM_MP_STATE_RUNNABLE:
6073 vcpu->arch.apf.halted = false;
6074 break;
6075 case KVM_MP_STATE_INIT_RECEIVED:
6076 break;
6077 default:
6078 r = -EINTR;
6079 break;
6080 }
6081 }
6082 }
6083
6084 if (r <= 0)
6085 break;
6086
6087 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
6088 if (kvm_cpu_has_pending_timer(vcpu))
6089 kvm_inject_pending_timer_irqs(vcpu);
6090
6091 if (dm_request_for_irq_injection(vcpu)) {
6092 r = -EINTR;
6093 vcpu->run->exit_reason = KVM_EXIT_INTR;
6094 ++vcpu->stat.request_irq_exits;
6095 }
6096
6097 kvm_check_async_pf_completion(vcpu);
6098
6099 if (signal_pending(current)) {
6100 r = -EINTR;
6101 vcpu->run->exit_reason = KVM_EXIT_INTR;
6102 ++vcpu->stat.signal_exits;
6103 }
6104 if (need_resched()) {
6105 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6106 kvm_resched(vcpu);
6107 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6108 }
6109 }
6110
6111 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6112
6113 vapic_exit(vcpu);
6114
6115 return r;
6116}
6117
6118static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
6119{
6120 int r;
6121 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6122 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
6123 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6124 if (r != EMULATE_DONE)
6125 return 0;
6126 return 1;
6127}
6128
6129static int complete_emulated_pio(struct kvm_vcpu *vcpu)
6130{
6131 BUG_ON(!vcpu->arch.pio.count);
6132
6133 return complete_emulated_io(vcpu);
6134}
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
6155{
6156 struct kvm_run *run = vcpu->run;
6157 struct kvm_mmio_fragment *frag;
6158 unsigned len;
6159
6160 BUG_ON(!vcpu->mmio_needed);
6161
6162
6163 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
6164 len = min(8u, frag->len);
6165 if (!vcpu->mmio_is_write)
6166 memcpy(frag->data, run->mmio.data, len);
6167
6168 if (frag->len <= 8) {
6169
6170 frag++;
6171 vcpu->mmio_cur_fragment++;
6172 } else {
6173
6174 frag->data += len;
6175 frag->gpa += len;
6176 frag->len -= len;
6177 }
6178
6179 if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
6180 vcpu->mmio_needed = 0;
6181
6182
6183 if (vcpu->mmio_is_write)
6184 return 1;
6185 vcpu->mmio_read_completed = 1;
6186 return complete_emulated_io(vcpu);
6187 }
6188
6189 run->exit_reason = KVM_EXIT_MMIO;
6190 run->mmio.phys_addr = frag->gpa;
6191 if (vcpu->mmio_is_write)
6192 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
6193 run->mmio.len = min(8u, frag->len);
6194 run->mmio.is_write = vcpu->mmio_is_write;
6195 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6196 return 0;
6197}
6198
6199
6200int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
6201{
6202 int r;
6203 sigset_t sigsaved;
6204
6205 if (!tsk_used_math(current) && init_fpu(current))
6206 return -ENOMEM;
6207
6208 if (vcpu->sigset_active)
6209 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
6210
6211 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
6212 kvm_vcpu_block(vcpu);
6213 kvm_apic_accept_events(vcpu);
6214 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
6215 r = -EAGAIN;
6216 goto out;
6217 }
6218
6219
6220 if (!irqchip_in_kernel(vcpu->kvm)) {
6221 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
6222 r = -EINVAL;
6223 goto out;
6224 }
6225 }
6226
6227 if (unlikely(vcpu->arch.complete_userspace_io)) {
6228 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
6229 vcpu->arch.complete_userspace_io = NULL;
6230 r = cui(vcpu);
6231 if (r <= 0)
6232 goto out;
6233 } else
6234 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
6235
6236 r = __vcpu_run(vcpu);
6237
6238out:
6239 post_kvm_run_save(vcpu);
6240 if (vcpu->sigset_active)
6241 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
6242
6243 return r;
6244}
6245
6246int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6247{
6248 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
6249
6250
6251
6252
6253
6254
6255
6256 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
6257 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6258 }
6259 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
6260 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
6261 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
6262 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
6263 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
6264 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
6265 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
6266 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
6267#ifdef CONFIG_X86_64
6268 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
6269 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
6270 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
6271 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
6272 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
6273 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
6274 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
6275 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
6276#endif
6277
6278 regs->rip = kvm_rip_read(vcpu);
6279 regs->rflags = kvm_get_rflags(vcpu);
6280
6281 return 0;
6282}
6283
6284int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6285{
6286 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
6287 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6288
6289 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
6290 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
6291 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
6292 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
6293 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
6294 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
6295 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
6296 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
6297#ifdef CONFIG_X86_64
6298 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
6299 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
6300 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
6301 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
6302 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
6303 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
6304 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
6305 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
6306#endif
6307
6308 kvm_rip_write(vcpu, regs->rip);
6309 kvm_set_rflags(vcpu, regs->rflags);
6310
6311 vcpu->arch.exception.pending = false;
6312
6313 kvm_make_request(KVM_REQ_EVENT, vcpu);
6314
6315 return 0;
6316}
6317
6318void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
6319{
6320 struct kvm_segment cs;
6321
6322 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6323 *db = cs.db;
6324 *l = cs.l;
6325}
6326EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
6327
6328int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
6329 struct kvm_sregs *sregs)
6330{
6331 struct desc_ptr dt;
6332
6333 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6334 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6335 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6336 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6337 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6338 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6339
6340 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6341 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6342
6343 kvm_x86_ops->get_idt(vcpu, &dt);
6344 sregs->idt.limit = dt.size;
6345 sregs->idt.base = dt.address;
6346 kvm_x86_ops->get_gdt(vcpu, &dt);
6347 sregs->gdt.limit = dt.size;
6348 sregs->gdt.base = dt.address;
6349
6350 sregs->cr0 = kvm_read_cr0(vcpu);
6351 sregs->cr2 = vcpu->arch.cr2;
6352 sregs->cr3 = kvm_read_cr3(vcpu);
6353 sregs->cr4 = kvm_read_cr4(vcpu);
6354 sregs->cr8 = kvm_get_cr8(vcpu);
6355 sregs->efer = vcpu->arch.efer;
6356 sregs->apic_base = kvm_get_apic_base(vcpu);
6357
6358 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
6359
6360 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
6361 set_bit(vcpu->arch.interrupt.nr,
6362 (unsigned long *)sregs->interrupt_bitmap);
6363
6364 return 0;
6365}
6366
6367int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
6368 struct kvm_mp_state *mp_state)
6369{
6370 kvm_apic_accept_events(vcpu);
6371 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
6372 vcpu->arch.pv.pv_unhalted)
6373 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
6374 else
6375 mp_state->mp_state = vcpu->arch.mp_state;
6376
6377 return 0;
6378}
6379
6380int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
6381 struct kvm_mp_state *mp_state)
6382{
6383 if (!kvm_vcpu_has_lapic(vcpu) &&
6384 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
6385 return -EINVAL;
6386
6387 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
6388 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
6389 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
6390 } else
6391 vcpu->arch.mp_state = mp_state->mp_state;
6392 kvm_make_request(KVM_REQ_EVENT, vcpu);
6393 return 0;
6394}
6395
6396int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
6397 int reason, bool has_error_code, u32 error_code)
6398{
6399 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6400 int ret;
6401
6402 init_emulate_ctxt(vcpu);
6403
6404 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
6405 has_error_code, error_code);
6406
6407 if (ret)
6408 return EMULATE_FAIL;
6409
6410 kvm_rip_write(vcpu, ctxt->eip);
6411 kvm_set_rflags(vcpu, ctxt->eflags);
6412 kvm_make_request(KVM_REQ_EVENT, vcpu);
6413 return EMULATE_DONE;
6414}
6415EXPORT_SYMBOL_GPL(kvm_task_switch);
6416
6417int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6418 struct kvm_sregs *sregs)
6419{
6420 int mmu_reset_needed = 0;
6421 int pending_vec, max_bits, idx;
6422 struct desc_ptr dt;
6423
6424 if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
6425 return -EINVAL;
6426
6427 dt.size = sregs->idt.limit;
6428 dt.address = sregs->idt.base;
6429 kvm_x86_ops->set_idt(vcpu, &dt);
6430 dt.size = sregs->gdt.limit;
6431 dt.address = sregs->gdt.base;
6432 kvm_x86_ops->set_gdt(vcpu, &dt);
6433
6434 vcpu->arch.cr2 = sregs->cr2;
6435 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
6436 vcpu->arch.cr3 = sregs->cr3;
6437 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
6438
6439 kvm_set_cr8(vcpu, sregs->cr8);
6440
6441 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
6442 kvm_x86_ops->set_efer(vcpu, sregs->efer);
6443 kvm_set_apic_base(vcpu, sregs->apic_base);
6444
6445 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
6446 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
6447 vcpu->arch.cr0 = sregs->cr0;
6448
6449 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
6450 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
6451 if (sregs->cr4 & X86_CR4_OSXSAVE)
6452 kvm_update_cpuid(vcpu);
6453
6454 idx = srcu_read_lock(&vcpu->kvm->srcu);
6455 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
6456 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6457 mmu_reset_needed = 1;
6458 }
6459 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6460
6461 if (mmu_reset_needed)
6462 kvm_mmu_reset_context(vcpu);
6463
6464 max_bits = KVM_NR_INTERRUPTS;
6465 pending_vec = find_first_bit(
6466 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
6467 if (pending_vec < max_bits) {
6468 kvm_queue_interrupt(vcpu, pending_vec, false);
6469 pr_debug("Set back pending irq %d\n", pending_vec);
6470 }
6471
6472 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6473 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6474 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6475 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6476 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6477 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6478
6479 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6480 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6481
6482 update_cr8_intercept(vcpu);
6483
6484
6485 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
6486 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
6487 !is_protmode(vcpu))
6488 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6489
6490 kvm_make_request(KVM_REQ_EVENT, vcpu);
6491
6492 return 0;
6493}
6494
6495int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
6496 struct kvm_guest_debug *dbg)
6497{
6498 unsigned long rflags;
6499 int i, r;
6500
6501 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
6502 r = -EBUSY;
6503 if (vcpu->arch.exception.pending)
6504 goto out;
6505 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
6506 kvm_queue_exception(vcpu, DB_VECTOR);
6507 else
6508 kvm_queue_exception(vcpu, BP_VECTOR);
6509 }
6510
6511
6512
6513
6514
6515 rflags = kvm_get_rflags(vcpu);
6516
6517 vcpu->guest_debug = dbg->control;
6518 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
6519 vcpu->guest_debug = 0;
6520
6521 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
6522 for (i = 0; i < KVM_NR_DB_REGS; ++i)
6523 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
6524 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
6525 } else {
6526 for (i = 0; i < KVM_NR_DB_REGS; i++)
6527 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6528 }
6529 kvm_update_dr7(vcpu);
6530
6531 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6532 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
6533 get_segment_base(vcpu, VCPU_SREG_CS);
6534
6535
6536
6537
6538
6539 kvm_set_rflags(vcpu, rflags);
6540
6541 kvm_x86_ops->update_db_bp_intercept(vcpu);
6542
6543 r = 0;
6544
6545out:
6546
6547 return r;
6548}
6549
6550
6551
6552
6553int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
6554 struct kvm_translation *tr)
6555{
6556 unsigned long vaddr = tr->linear_address;
6557 gpa_t gpa;
6558 int idx;
6559
6560 idx = srcu_read_lock(&vcpu->kvm->srcu);
6561 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
6562 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6563 tr->physical_address = gpa;
6564 tr->valid = gpa != UNMAPPED_GVA;
6565 tr->writeable = 1;
6566 tr->usermode = 0;
6567
6568 return 0;
6569}
6570
6571int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6572{
6573 struct i387_fxsave_struct *fxsave =
6574 &vcpu->arch.guest_fpu.state->fxsave;
6575
6576 memcpy(fpu->fpr, fxsave->st_space, 128);
6577 fpu->fcw = fxsave->cwd;
6578 fpu->fsw = fxsave->swd;
6579 fpu->ftwx = fxsave->twd;
6580 fpu->last_opcode = fxsave->fop;
6581 fpu->last_ip = fxsave->rip;
6582 fpu->last_dp = fxsave->rdp;
6583 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
6584
6585 return 0;
6586}
6587
6588int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6589{
6590 struct i387_fxsave_struct *fxsave =
6591 &vcpu->arch.guest_fpu.state->fxsave;
6592
6593 memcpy(fxsave->st_space, fpu->fpr, 128);
6594 fxsave->cwd = fpu->fcw;
6595 fxsave->swd = fpu->fsw;
6596 fxsave->twd = fpu->ftwx;
6597 fxsave->fop = fpu->last_opcode;
6598 fxsave->rip = fpu->last_ip;
6599 fxsave->rdp = fpu->last_dp;
6600 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
6601
6602 return 0;
6603}
6604
6605int fx_init(struct kvm_vcpu *vcpu)
6606{
6607 int err;
6608
6609 err = fpu_alloc(&vcpu->arch.guest_fpu);
6610 if (err)
6611 return err;
6612
6613 fpu_finit(&vcpu->arch.guest_fpu);
6614
6615
6616
6617
6618 vcpu->arch.xcr0 = XSTATE_FP;
6619
6620 vcpu->arch.cr0 |= X86_CR0_ET;
6621
6622 return 0;
6623}
6624EXPORT_SYMBOL_GPL(fx_init);
6625
6626static void fx_free(struct kvm_vcpu *vcpu)
6627{
6628 fpu_free(&vcpu->arch.guest_fpu);
6629}
6630
6631void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
6632{
6633 if (vcpu->guest_fpu_loaded)
6634 return;
6635
6636
6637
6638
6639
6640
6641 kvm_put_guest_xcr0(vcpu);
6642 vcpu->guest_fpu_loaded = 1;
6643 __kernel_fpu_begin();
6644 fpu_restore_checking(&vcpu->arch.guest_fpu);
6645 trace_kvm_fpu(1);
6646}
6647
6648void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6649{
6650 kvm_put_guest_xcr0(vcpu);
6651
6652 if (!vcpu->guest_fpu_loaded)
6653 return;
6654
6655 vcpu->guest_fpu_loaded = 0;
6656 fpu_save_init(&vcpu->arch.guest_fpu);
6657 __kernel_fpu_end();
6658 ++vcpu->stat.fpu_reload;
6659 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
6660 trace_kvm_fpu(0);
6661}
6662
6663void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
6664{
6665 kvmclock_reset(vcpu);
6666
6667 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6668 fx_free(vcpu);
6669 kvm_x86_ops->vcpu_free(vcpu);
6670}
6671
6672struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
6673 unsigned int id)
6674{
6675 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
6676 printk_once(KERN_WARNING
6677 "kvm: SMP vm created on host with unstable TSC; "
6678 "guest TSC will not be reliable\n");
6679 return kvm_x86_ops->vcpu_create(kvm, id);
6680}
6681
6682int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6683{
6684 int r;
6685
6686 vcpu->arch.mtrr_state.have_fixed = 1;
6687 r = vcpu_load(vcpu);
6688 if (r)
6689 return r;
6690 kvm_vcpu_reset(vcpu);
6691 r = kvm_mmu_setup(vcpu);
6692 vcpu_put(vcpu);
6693
6694 return r;
6695}
6696
6697int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6698{
6699 int r;
6700 struct msr_data msr;
6701
6702 r = vcpu_load(vcpu);
6703 if (r)
6704 return r;
6705 msr.data = 0x0;
6706 msr.index = MSR_IA32_TSC;
6707 msr.host_initiated = true;
6708 kvm_write_tsc(vcpu, &msr);
6709 vcpu_put(vcpu);
6710
6711 return r;
6712}
6713
6714void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6715{
6716 int r;
6717 vcpu->arch.apf.msr_val = 0;
6718
6719 r = vcpu_load(vcpu);
6720 BUG_ON(r);
6721 kvm_mmu_unload(vcpu);
6722 vcpu_put(vcpu);
6723
6724 fx_free(vcpu);
6725 kvm_x86_ops->vcpu_free(vcpu);
6726}
6727
6728void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6729{
6730 atomic_set(&vcpu->arch.nmi_queued, 0);
6731 vcpu->arch.nmi_pending = 0;
6732 vcpu->arch.nmi_injected = false;
6733
6734 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6735 vcpu->arch.dr6 = DR6_FIXED_1;
6736 vcpu->arch.dr7 = DR7_FIXED_1;
6737 kvm_update_dr7(vcpu);
6738
6739 kvm_make_request(KVM_REQ_EVENT, vcpu);
6740 vcpu->arch.apf.msr_val = 0;
6741 vcpu->arch.st.msr_val = 0;
6742
6743 kvmclock_reset(vcpu);
6744
6745 kvm_clear_async_pf_completion_queue(vcpu);
6746 kvm_async_pf_hash_reset(vcpu);
6747 vcpu->arch.apf.halted = false;
6748
6749 kvm_pmu_reset(vcpu);
6750
6751 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
6752 vcpu->arch.regs_avail = ~0;
6753 vcpu->arch.regs_dirty = ~0;
6754
6755 kvm_x86_ops->vcpu_reset(vcpu);
6756}
6757
6758void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
6759{
6760 struct kvm_segment cs;
6761
6762 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6763 cs.selector = vector << 8;
6764 cs.base = vector << 12;
6765 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
6766 kvm_rip_write(vcpu, 0);
6767}
6768
6769int kvm_arch_hardware_enable(void *garbage)
6770{
6771 struct kvm *kvm;
6772 struct kvm_vcpu *vcpu;
6773 int i;
6774 int ret;
6775 u64 local_tsc;
6776 u64 max_tsc = 0;
6777 bool stable, backwards_tsc = false;
6778
6779 kvm_shared_msr_cpu_online();
6780 ret = kvm_x86_ops->hardware_enable(garbage);
6781 if (ret != 0)
6782 return ret;
6783
6784 local_tsc = native_read_tsc();
6785 stable = !check_tsc_unstable();
6786 list_for_each_entry(kvm, &vm_list, vm_list) {
6787 kvm_for_each_vcpu(i, vcpu, kvm) {
6788 if (!stable && vcpu->cpu == smp_processor_id())
6789 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
6790 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6791 backwards_tsc = true;
6792 if (vcpu->arch.last_host_tsc > max_tsc)
6793 max_tsc = vcpu->arch.last_host_tsc;
6794 }
6795 }
6796 }
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836 if (backwards_tsc) {
6837 u64 delta_cyc = max_tsc - local_tsc;
6838 list_for_each_entry(kvm, &vm_list, vm_list) {
6839 kvm_for_each_vcpu(i, vcpu, kvm) {
6840 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6841 vcpu->arch.last_host_tsc = local_tsc;
6842 set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
6843 &vcpu->requests);
6844 }
6845
6846
6847
6848
6849
6850
6851
6852 kvm->arch.last_tsc_nsec = 0;
6853 kvm->arch.last_tsc_write = 0;
6854 }
6855
6856 }
6857 return 0;
6858}
6859
6860void kvm_arch_hardware_disable(void *garbage)
6861{
6862 kvm_x86_ops->hardware_disable(garbage);
6863 drop_user_return_notifiers(garbage);
6864}
6865
6866int kvm_arch_hardware_setup(void)
6867{
6868 return kvm_x86_ops->hardware_setup();
6869}
6870
6871void kvm_arch_hardware_unsetup(void)
6872{
6873 kvm_x86_ops->hardware_unsetup();
6874}
6875
6876void kvm_arch_check_processor_compat(void *rtn)
6877{
6878 kvm_x86_ops->check_processor_compatibility(rtn);
6879}
6880
6881bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6882{
6883 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6884}
6885
6886struct static_key kvm_no_apic_vcpu __read_mostly;
6887
6888int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6889{
6890 struct page *page;
6891 struct kvm *kvm;
6892 int r;
6893
6894 BUG_ON(vcpu->kvm == NULL);
6895 kvm = vcpu->kvm;
6896
6897 vcpu->arch.pv.pv_unhalted = false;
6898 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
6899 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
6900 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6901 else
6902 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
6903
6904 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
6905 if (!page) {
6906 r = -ENOMEM;
6907 goto fail;
6908 }
6909 vcpu->arch.pio_data = page_address(page);
6910
6911 kvm_set_tsc_khz(vcpu, max_tsc_khz);
6912
6913 r = kvm_mmu_create(vcpu);
6914 if (r < 0)
6915 goto fail_free_pio_data;
6916
6917 if (irqchip_in_kernel(kvm)) {
6918 r = kvm_create_lapic(vcpu);
6919 if (r < 0)
6920 goto fail_mmu_destroy;
6921 } else
6922 static_key_slow_inc(&kvm_no_apic_vcpu);
6923
6924 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
6925 GFP_KERNEL);
6926 if (!vcpu->arch.mce_banks) {
6927 r = -ENOMEM;
6928 goto fail_free_lapic;
6929 }
6930 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
6931
6932 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
6933 r = -ENOMEM;
6934 goto fail_free_mce_banks;
6935 }
6936
6937 r = fx_init(vcpu);
6938 if (r)
6939 goto fail_free_wbinvd_dirty_mask;
6940
6941 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6942 vcpu->arch.pv_time_enabled = false;
6943 kvm_async_pf_hash_reset(vcpu);
6944 kvm_pmu_init(vcpu);
6945
6946 return 0;
6947fail_free_wbinvd_dirty_mask:
6948 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6949fail_free_mce_banks:
6950 kfree(vcpu->arch.mce_banks);
6951fail_free_lapic:
6952 kvm_free_lapic(vcpu);
6953fail_mmu_destroy:
6954 kvm_mmu_destroy(vcpu);
6955fail_free_pio_data:
6956 free_page((unsigned long)vcpu->arch.pio_data);
6957fail:
6958 return r;
6959}
6960
6961void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
6962{
6963 int idx;
6964
6965 kvm_pmu_destroy(vcpu);
6966 kfree(vcpu->arch.mce_banks);
6967 kvm_free_lapic(vcpu);
6968 idx = srcu_read_lock(&vcpu->kvm->srcu);
6969 kvm_mmu_destroy(vcpu);
6970 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6971 free_page((unsigned long)vcpu->arch.pio_data);
6972 if (!irqchip_in_kernel(vcpu->kvm))
6973 static_key_slow_dec(&kvm_no_apic_vcpu);
6974}
6975
6976int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6977{
6978 if (type)
6979 return -EINVAL;
6980
6981 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6982 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
6983 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6984
6985
6986 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
6987
6988 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
6989 &kvm->arch.irq_sources_bitmap);
6990
6991 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6992 mutex_init(&kvm->arch.apic_map_lock);
6993 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
6994
6995 pvclock_update_vm_gtod_copy(kvm);
6996
6997 return 0;
6998}
6999
7000static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
7001{
7002 int r;
7003 r = vcpu_load(vcpu);
7004 BUG_ON(r);
7005 kvm_mmu_unload(vcpu);
7006 vcpu_put(vcpu);
7007}
7008
7009static void kvm_free_vcpus(struct kvm *kvm)
7010{
7011 unsigned int i;
7012 struct kvm_vcpu *vcpu;
7013
7014
7015
7016
7017 kvm_for_each_vcpu(i, vcpu, kvm) {
7018 kvm_clear_async_pf_completion_queue(vcpu);
7019 kvm_unload_vcpu_mmu(vcpu);
7020 }
7021 kvm_for_each_vcpu(i, vcpu, kvm)
7022 kvm_arch_vcpu_free(vcpu);
7023
7024 mutex_lock(&kvm->lock);
7025 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
7026 kvm->vcpus[i] = NULL;
7027
7028 atomic_set(&kvm->online_vcpus, 0);
7029 mutex_unlock(&kvm->lock);
7030}
7031
7032void kvm_arch_sync_events(struct kvm *kvm)
7033{
7034 kvm_free_all_assigned_devices(kvm);
7035 kvm_free_pit(kvm);
7036}
7037
7038void kvm_arch_destroy_vm(struct kvm *kvm)
7039{
7040 if (current->mm == kvm->mm) {
7041
7042
7043
7044
7045
7046 struct kvm_userspace_memory_region mem;
7047 memset(&mem, 0, sizeof(mem));
7048 mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
7049 kvm_set_memory_region(kvm, &mem);
7050
7051 mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
7052 kvm_set_memory_region(kvm, &mem);
7053
7054 mem.slot = TSS_PRIVATE_MEMSLOT;
7055 kvm_set_memory_region(kvm, &mem);
7056 }
7057 kvm_iommu_unmap_guest(kvm);
7058 kfree(kvm->arch.vpic);
7059 kfree(kvm->arch.vioapic);
7060 kvm_free_vcpus(kvm);
7061 if (kvm->arch.apic_access_page)
7062 put_page(kvm->arch.apic_access_page);
7063 if (kvm->arch.ept_identity_pagetable)
7064 put_page(kvm->arch.ept_identity_pagetable);
7065 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7066}
7067
7068void kvm_arch_free_memslot(struct kvm_memory_slot *free,
7069 struct kvm_memory_slot *dont)
7070{
7071 int i;
7072
7073 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7074 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
7075 kvm_kvfree(free->arch.rmap[i]);
7076 free->arch.rmap[i] = NULL;
7077 }
7078 if (i == 0)
7079 continue;
7080
7081 if (!dont || free->arch.lpage_info[i - 1] !=
7082 dont->arch.lpage_info[i - 1]) {
7083 kvm_kvfree(free->arch.lpage_info[i - 1]);
7084 free->arch.lpage_info[i - 1] = NULL;
7085 }
7086 }
7087}
7088
7089int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
7090{
7091 int i;
7092
7093 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7094 unsigned long ugfn;
7095 int lpages;
7096 int level = i + 1;
7097
7098 lpages = gfn_to_index(slot->base_gfn + npages - 1,
7099 slot->base_gfn, level) + 1;
7100
7101 slot->arch.rmap[i] =
7102 kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
7103 if (!slot->arch.rmap[i])
7104 goto out_free;
7105 if (i == 0)
7106 continue;
7107
7108 slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
7109 sizeof(*slot->arch.lpage_info[i - 1]));
7110 if (!slot->arch.lpage_info[i - 1])
7111 goto out_free;
7112
7113 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
7114 slot->arch.lpage_info[i - 1][0].write_count = 1;
7115 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
7116 slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
7117 ugfn = slot->userspace_addr >> PAGE_SHIFT;
7118
7119
7120
7121
7122
7123 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
7124 !kvm_largepages_enabled()) {
7125 unsigned long j;
7126
7127 for (j = 0; j < lpages; ++j)
7128 slot->arch.lpage_info[i - 1][j].write_count = 1;
7129 }
7130 }
7131
7132 return 0;
7133
7134out_free:
7135 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7136 kvm_kvfree(slot->arch.rmap[i]);
7137 slot->arch.rmap[i] = NULL;
7138 if (i == 0)
7139 continue;
7140
7141 kvm_kvfree(slot->arch.lpage_info[i - 1]);
7142 slot->arch.lpage_info[i - 1] = NULL;
7143 }
7144 return -ENOMEM;
7145}
7146
7147void kvm_arch_memslots_updated(struct kvm *kvm)
7148{
7149
7150
7151
7152
7153 kvm_mmu_invalidate_mmio_sptes(kvm);
7154}
7155
7156int kvm_arch_prepare_memory_region(struct kvm *kvm,
7157 struct kvm_memory_slot *memslot,
7158 struct kvm_userspace_memory_region *mem,
7159 enum kvm_mr_change change)
7160{
7161
7162
7163
7164
7165 if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
7166 unsigned long userspace_addr;
7167
7168
7169
7170
7171
7172 userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
7173 PROT_READ | PROT_WRITE,
7174 MAP_SHARED | MAP_ANONYMOUS, 0);
7175
7176 if (IS_ERR((void *)userspace_addr))
7177 return PTR_ERR((void *)userspace_addr);
7178
7179 memslot->userspace_addr = userspace_addr;
7180 }
7181
7182 return 0;
7183}
7184
7185void kvm_arch_commit_memory_region(struct kvm *kvm,
7186 struct kvm_userspace_memory_region *mem,
7187 const struct kvm_memory_slot *old,
7188 enum kvm_mr_change change)
7189{
7190
7191 int nr_mmu_pages = 0;
7192
7193 if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
7194 int ret;
7195
7196 ret = vm_munmap(old->userspace_addr,
7197 old->npages * PAGE_SIZE);
7198 if (ret < 0)
7199 printk(KERN_WARNING
7200 "kvm_vm_ioctl_set_memory_region: "
7201 "failed to munmap memory\n");
7202 }
7203
7204 if (!kvm->arch.n_requested_mmu_pages)
7205 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
7206
7207 if (nr_mmu_pages)
7208 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
7209
7210
7211
7212
7213
7214 if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
7215 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
7216}
7217
7218void kvm_arch_flush_shadow_all(struct kvm *kvm)
7219{
7220 kvm_mmu_invalidate_zap_all_pages(kvm);
7221}
7222
7223void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
7224 struct kvm_memory_slot *slot)
7225{
7226 kvm_mmu_invalidate_zap_all_pages(kvm);
7227}
7228
7229int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
7230{
7231 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7232 !vcpu->arch.apf.halted)
7233 || !list_empty_careful(&vcpu->async_pf.done)
7234 || kvm_apic_has_events(vcpu)
7235 || vcpu->arch.pv.pv_unhalted
7236 || atomic_read(&vcpu->arch.nmi_queued) ||
7237 (kvm_arch_interrupt_allowed(vcpu) &&
7238 kvm_cpu_has_interrupt(vcpu));
7239}
7240
7241int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
7242{
7243 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
7244}
7245
7246int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
7247{
7248 return kvm_x86_ops->interrupt_allowed(vcpu);
7249}
7250
7251bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
7252{
7253 unsigned long current_rip = kvm_rip_read(vcpu) +
7254 get_segment_base(vcpu, VCPU_SREG_CS);
7255
7256 return current_rip == linear_rip;
7257}
7258EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
7259
7260unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
7261{
7262 unsigned long rflags;
7263
7264 rflags = kvm_x86_ops->get_rflags(vcpu);
7265 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7266 rflags &= ~X86_EFLAGS_TF;
7267 return rflags;
7268}
7269EXPORT_SYMBOL_GPL(kvm_get_rflags);
7270
7271void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7272{
7273 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
7274 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
7275 rflags |= X86_EFLAGS_TF;
7276 kvm_x86_ops->set_rflags(vcpu, rflags);
7277 kvm_make_request(KVM_REQ_EVENT, vcpu);
7278}
7279EXPORT_SYMBOL_GPL(kvm_set_rflags);
7280
7281void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
7282{
7283 int r;
7284
7285 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
7286 is_error_page(work->page))
7287 return;
7288
7289 r = kvm_mmu_reload(vcpu);
7290 if (unlikely(r))
7291 return;
7292
7293 if (!vcpu->arch.mmu.direct_map &&
7294 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
7295 return;
7296
7297 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
7298}
7299
7300static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
7301{
7302 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
7303}
7304
7305static inline u32 kvm_async_pf_next_probe(u32 key)
7306{
7307 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
7308}
7309
7310static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7311{
7312 u32 key = kvm_async_pf_hash_fn(gfn);
7313
7314 while (vcpu->arch.apf.gfns[key] != ~0)
7315 key = kvm_async_pf_next_probe(key);
7316
7317 vcpu->arch.apf.gfns[key] = gfn;
7318}
7319
7320static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
7321{
7322 int i;
7323 u32 key = kvm_async_pf_hash_fn(gfn);
7324
7325 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
7326 (vcpu->arch.apf.gfns[key] != gfn &&
7327 vcpu->arch.apf.gfns[key] != ~0); i++)
7328 key = kvm_async_pf_next_probe(key);
7329
7330 return key;
7331}
7332
7333bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7334{
7335 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
7336}
7337
7338static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7339{
7340 u32 i, j, k;
7341
7342 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
7343 while (true) {
7344 vcpu->arch.apf.gfns[i] = ~0;
7345 do {
7346 j = kvm_async_pf_next_probe(j);
7347 if (vcpu->arch.apf.gfns[j] == ~0)
7348 return;
7349 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
7350
7351
7352
7353
7354
7355 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
7356 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
7357 i = j;
7358 }
7359}
7360
7361static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
7362{
7363
7364 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
7365 sizeof(val));
7366}
7367
7368void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
7369 struct kvm_async_pf *work)
7370{
7371 struct x86_exception fault;
7372
7373 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
7374 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
7375
7376 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
7377 (vcpu->arch.apf.send_user_only &&
7378 kvm_x86_ops->get_cpl(vcpu) == 0))
7379 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
7380 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
7381 fault.vector = PF_VECTOR;
7382 fault.error_code_valid = true;
7383 fault.error_code = 0;
7384 fault.nested_page_fault = false;
7385 fault.address = work->arch.token;
7386 kvm_inject_page_fault(vcpu, &fault);
7387 }
7388}
7389
7390void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
7391 struct kvm_async_pf *work)
7392{
7393 struct x86_exception fault;
7394
7395 trace_kvm_async_pf_ready(work->arch.token, work->gva);
7396 if (is_error_page(work->page))
7397 work->arch.token = ~0;
7398 else
7399 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
7400
7401 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
7402 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
7403 fault.vector = PF_VECTOR;
7404 fault.error_code_valid = true;
7405 fault.error_code = 0;
7406 fault.nested_page_fault = false;
7407 fault.address = work->arch.token;
7408 kvm_inject_page_fault(vcpu, &fault);
7409 }
7410 vcpu->arch.apf.halted = false;
7411 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7412}
7413
7414bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
7415{
7416 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
7417 return true;
7418 else
7419 return !kvm_event_needs_reinjection(vcpu) &&
7420 kvm_x86_ops->interrupt_allowed(vcpu);
7421}
7422
7423EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
7424EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
7425EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
7426EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
7427EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
7428EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
7429EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
7430EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
7431EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
7432EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
7433EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7434EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7435EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
7436