1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30
31#include <linux/clocksource.h>
32#include <linux/interrupt.h>
33#include <linux/kvm.h>
34#include <linux/fs.h>
35#include <linux/vmalloc.h>
36#include <linux/module.h>
37#include <linux/mman.h>
38#include <linux/highmem.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/cpufreq.h>
42#include <linux/user-return-notifier.h>
43#include <linux/srcu.h>
44#include <linux/slab.h>
45#include <linux/perf_event.h>
46#include <linux/uaccess.h>
47#include <linux/hash.h>
48#include <linux/pci.h>
49#include <trace/events/kvm.h>
50
51#define CREATE_TRACE_POINTS
52#include "trace.h"
53
54#include <asm/debugreg.h>
55#include <asm/msr.h>
56#include <asm/desc.h>
57#include <asm/mtrr.h>
58#include <asm/mce.h>
59#include <asm/i387.h>
60#include <asm/fpu-internal.h>
61#include <asm/xcr.h>
62#include <asm/pvclock.h>
63#include <asm/div64.h>
64
65#define MAX_IO_MSRS 256
66#define KVM_MAX_MCE_BANKS 32
67#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
68
69#define emul_to_vcpu(ctxt) \
70 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
71
72
73
74
75
76#ifdef CONFIG_X86_64
77static
78u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
79#else
80static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
81#endif
82
83#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
84#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
85
86static void update_cr8_intercept(struct kvm_vcpu *vcpu);
87static void process_nmi(struct kvm_vcpu *vcpu);
88
89struct kvm_x86_ops *kvm_x86_ops;
90EXPORT_SYMBOL_GPL(kvm_x86_ops);
91
92static bool ignore_msrs = 0;
93module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
94
95bool kvm_has_tsc_control;
96EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
97u32 kvm_max_guest_tsc_khz;
98EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
99
100
101static u32 tsc_tolerance_ppm = 250;
102module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
103
104#define KVM_NR_SHARED_MSRS 16
105
106struct kvm_shared_msrs_global {
107 int nr;
108 u32 msrs[KVM_NR_SHARED_MSRS];
109};
110
111struct kvm_shared_msrs {
112 struct user_return_notifier urn;
113 bool registered;
114 struct kvm_shared_msr_values {
115 u64 host;
116 u64 curr;
117 } values[KVM_NR_SHARED_MSRS];
118};
119
120static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
121static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
122
123struct kvm_stats_debugfs_item debugfs_entries[] = {
124 { "pf_fixed", VCPU_STAT(pf_fixed) },
125 { "pf_guest", VCPU_STAT(pf_guest) },
126 { "tlb_flush", VCPU_STAT(tlb_flush) },
127 { "invlpg", VCPU_STAT(invlpg) },
128 { "exits", VCPU_STAT(exits) },
129 { "io_exits", VCPU_STAT(io_exits) },
130 { "mmio_exits", VCPU_STAT(mmio_exits) },
131 { "signal_exits", VCPU_STAT(signal_exits) },
132 { "irq_window", VCPU_STAT(irq_window_exits) },
133 { "nmi_window", VCPU_STAT(nmi_window_exits) },
134 { "halt_exits", VCPU_STAT(halt_exits) },
135 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
136 { "hypercalls", VCPU_STAT(hypercalls) },
137 { "request_irq", VCPU_STAT(request_irq_exits) },
138 { "irq_exits", VCPU_STAT(irq_exits) },
139 { "host_state_reload", VCPU_STAT(host_state_reload) },
140 { "efer_reload", VCPU_STAT(efer_reload) },
141 { "fpu_reload", VCPU_STAT(fpu_reload) },
142 { "insn_emulation", VCPU_STAT(insn_emulation) },
143 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
144 { "irq_injections", VCPU_STAT(irq_injections) },
145 { "nmi_injections", VCPU_STAT(nmi_injections) },
146 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
147 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
148 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
149 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
150 { "mmu_flooded", VM_STAT(mmu_flooded) },
151 { "mmu_recycled", VM_STAT(mmu_recycled) },
152 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
153 { "mmu_unsync", VM_STAT(mmu_unsync) },
154 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
155 { "largepages", VM_STAT(lpages) },
156 { NULL }
157};
158
159u64 __read_mostly host_xcr0;
160
161int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
162
163static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
164{
165 int i;
166 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
167 vcpu->arch.apf.gfns[i] = ~0;
168}
169
170static void kvm_on_user_return(struct user_return_notifier *urn)
171{
172 unsigned slot;
173 struct kvm_shared_msrs *locals
174 = container_of(urn, struct kvm_shared_msrs, urn);
175 struct kvm_shared_msr_values *values;
176
177 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
178 values = &locals->values[slot];
179 if (values->host != values->curr) {
180 wrmsrl(shared_msrs_global.msrs[slot], values->host);
181 values->curr = values->host;
182 }
183 }
184 locals->registered = false;
185 user_return_notifier_unregister(urn);
186}
187
188static void shared_msr_update(unsigned slot, u32 msr)
189{
190 struct kvm_shared_msrs *smsr;
191 u64 value;
192
193 smsr = &__get_cpu_var(shared_msrs);
194
195
196 if (slot >= shared_msrs_global.nr) {
197 printk(KERN_ERR "kvm: invalid MSR slot!");
198 return;
199 }
200 rdmsrl_safe(msr, &value);
201 smsr->values[slot].host = value;
202 smsr->values[slot].curr = value;
203}
204
205void kvm_define_shared_msr(unsigned slot, u32 msr)
206{
207 if (slot >= shared_msrs_global.nr)
208 shared_msrs_global.nr = slot + 1;
209 shared_msrs_global.msrs[slot] = msr;
210
211 smp_wmb();
212}
213EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
214
215static void kvm_shared_msr_cpu_online(void)
216{
217 unsigned i;
218
219 for (i = 0; i < shared_msrs_global.nr; ++i)
220 shared_msr_update(i, shared_msrs_global.msrs[i]);
221}
222
223void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
224{
225 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
226
227 if (((value ^ smsr->values[slot].curr) & mask) == 0)
228 return;
229 smsr->values[slot].curr = value;
230 wrmsrl(shared_msrs_global.msrs[slot], value);
231 if (!smsr->registered) {
232 smsr->urn.on_user_return = kvm_on_user_return;
233 user_return_notifier_register(&smsr->urn);
234 smsr->registered = true;
235 }
236}
237EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
238
239static void drop_user_return_notifiers(void *ignore)
240{
241 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
242
243 if (smsr->registered)
244 kvm_on_user_return(&smsr->urn);
245}
246
247u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
248{
249 return vcpu->arch.apic_base;
250}
251EXPORT_SYMBOL_GPL(kvm_get_apic_base);
252
253void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
254{
255
256 kvm_lapic_set_base(vcpu, data);
257}
258EXPORT_SYMBOL_GPL(kvm_set_apic_base);
259
260#define EXCPT_BENIGN 0
261#define EXCPT_CONTRIBUTORY 1
262#define EXCPT_PF 2
263
264static int exception_class(int vector)
265{
266 switch (vector) {
267 case PF_VECTOR:
268 return EXCPT_PF;
269 case DE_VECTOR:
270 case TS_VECTOR:
271 case NP_VECTOR:
272 case SS_VECTOR:
273 case GP_VECTOR:
274 return EXCPT_CONTRIBUTORY;
275 default:
276 break;
277 }
278 return EXCPT_BENIGN;
279}
280
281static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
282 unsigned nr, bool has_error, u32 error_code,
283 bool reinject)
284{
285 u32 prev_nr;
286 int class1, class2;
287
288 kvm_make_request(KVM_REQ_EVENT, vcpu);
289
290 if (!vcpu->arch.exception.pending) {
291 queue:
292 vcpu->arch.exception.pending = true;
293 vcpu->arch.exception.has_error_code = has_error;
294 vcpu->arch.exception.nr = nr;
295 vcpu->arch.exception.error_code = error_code;
296 vcpu->arch.exception.reinject = reinject;
297 return;
298 }
299
300
301 prev_nr = vcpu->arch.exception.nr;
302 if (prev_nr == DF_VECTOR) {
303
304 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
305 return;
306 }
307 class1 = exception_class(prev_nr);
308 class2 = exception_class(nr);
309 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
310 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
311
312 vcpu->arch.exception.pending = true;
313 vcpu->arch.exception.has_error_code = true;
314 vcpu->arch.exception.nr = DF_VECTOR;
315 vcpu->arch.exception.error_code = 0;
316 } else
317
318
319
320 goto queue;
321}
322
323void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
324{
325 kvm_multiple_exception(vcpu, nr, false, 0, false);
326}
327EXPORT_SYMBOL_GPL(kvm_queue_exception);
328
329void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
330{
331 kvm_multiple_exception(vcpu, nr, false, 0, true);
332}
333EXPORT_SYMBOL_GPL(kvm_requeue_exception);
334
335void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
336{
337 if (err)
338 kvm_inject_gp(vcpu, 0);
339 else
340 kvm_x86_ops->skip_emulated_instruction(vcpu);
341}
342EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
343
344void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
345{
346 ++vcpu->stat.pf_guest;
347 vcpu->arch.cr2 = fault->address;
348 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
349}
350EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
351
352void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
353{
354 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
355 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
356 else
357 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
358}
359
360void kvm_inject_nmi(struct kvm_vcpu *vcpu)
361{
362 atomic_inc(&vcpu->arch.nmi_queued);
363 kvm_make_request(KVM_REQ_NMI, vcpu);
364}
365EXPORT_SYMBOL_GPL(kvm_inject_nmi);
366
367void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
368{
369 kvm_multiple_exception(vcpu, nr, true, error_code, false);
370}
371EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
372
373void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
374{
375 kvm_multiple_exception(vcpu, nr, true, error_code, true);
376}
377EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
378
379
380
381
382
383bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
384{
385 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
386 return true;
387 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
388 return false;
389}
390EXPORT_SYMBOL_GPL(kvm_require_cpl);
391
392
393
394
395
396
397int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
398 gfn_t ngfn, void *data, int offset, int len,
399 u32 access)
400{
401 gfn_t real_gfn;
402 gpa_t ngpa;
403
404 ngpa = gfn_to_gpa(ngfn);
405 real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
406 if (real_gfn == UNMAPPED_GVA)
407 return -EFAULT;
408
409 real_gfn = gpa_to_gfn(real_gfn);
410
411 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
412}
413EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
414
415int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
416 void *data, int offset, int len, u32 access)
417{
418 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
419 data, offset, len, access);
420}
421
422
423
424
425int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
426{
427 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
428 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
429 int i;
430 int ret;
431 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
432
433 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
434 offset * sizeof(u64), sizeof(pdpte),
435 PFERR_USER_MASK|PFERR_WRITE_MASK);
436 if (ret < 0) {
437 ret = 0;
438 goto out;
439 }
440 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
441 if (is_present_gpte(pdpte[i]) &&
442 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
443 ret = 0;
444 goto out;
445 }
446 }
447 ret = 1;
448
449 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
450 __set_bit(VCPU_EXREG_PDPTR,
451 (unsigned long *)&vcpu->arch.regs_avail);
452 __set_bit(VCPU_EXREG_PDPTR,
453 (unsigned long *)&vcpu->arch.regs_dirty);
454out:
455
456 return ret;
457}
458EXPORT_SYMBOL_GPL(load_pdptrs);
459
460static bool pdptrs_changed(struct kvm_vcpu *vcpu)
461{
462 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
463 bool changed = true;
464 int offset;
465 gfn_t gfn;
466 int r;
467
468 if (is_long_mode(vcpu) || !is_pae(vcpu))
469 return false;
470
471 if (!test_bit(VCPU_EXREG_PDPTR,
472 (unsigned long *)&vcpu->arch.regs_avail))
473 return true;
474
475 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
476 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
477 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
478 PFERR_USER_MASK | PFERR_WRITE_MASK);
479 if (r < 0)
480 goto out;
481 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
482out:
483
484 return changed;
485}
486
487int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
488{
489 unsigned long old_cr0 = kvm_read_cr0(vcpu);
490 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
491 X86_CR0_CD | X86_CR0_NW;
492
493 cr0 |= X86_CR0_ET;
494
495#ifdef CONFIG_X86_64
496 if (cr0 & 0xffffffff00000000UL)
497 return 1;
498#endif
499
500 cr0 &= ~CR0_RESERVED_BITS;
501
502 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
503 return 1;
504
505 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
506 return 1;
507
508 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
509#ifdef CONFIG_X86_64
510 if ((vcpu->arch.efer & EFER_LME)) {
511 int cs_db, cs_l;
512
513 if (!is_pae(vcpu))
514 return 1;
515 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
516 if (cs_l)
517 return 1;
518 } else
519#endif
520 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
521 kvm_read_cr3(vcpu)))
522 return 1;
523 }
524
525 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
526 return 1;
527
528 kvm_x86_ops->set_cr0(vcpu, cr0);
529
530 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
531 kvm_clear_async_pf_completion_queue(vcpu);
532 kvm_async_pf_hash_reset(vcpu);
533 }
534
535 if ((cr0 ^ old_cr0) & update_bits)
536 kvm_mmu_reset_context(vcpu);
537 return 0;
538}
539EXPORT_SYMBOL_GPL(kvm_set_cr0);
540
541void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
542{
543 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
544}
545EXPORT_SYMBOL_GPL(kvm_lmsw);
546
547int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
548{
549 u64 xcr0;
550
551
552 if (index != XCR_XFEATURE_ENABLED_MASK)
553 return 1;
554 xcr0 = xcr;
555 if (kvm_x86_ops->get_cpl(vcpu) != 0)
556 return 1;
557 if (!(xcr0 & XSTATE_FP))
558 return 1;
559 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
560 return 1;
561 if (xcr0 & ~host_xcr0)
562 return 1;
563 vcpu->arch.xcr0 = xcr0;
564 vcpu->guest_xcr0_loaded = 0;
565 return 0;
566}
567
568int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
569{
570 if (__kvm_set_xcr(vcpu, index, xcr)) {
571 kvm_inject_gp(vcpu, 0);
572 return 1;
573 }
574 return 0;
575}
576EXPORT_SYMBOL_GPL(kvm_set_xcr);
577
578int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
579{
580 unsigned long old_cr4 = kvm_read_cr4(vcpu);
581 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
582 X86_CR4_PAE | X86_CR4_SMEP;
583 if (cr4 & CR4_RESERVED_BITS)
584 return 1;
585
586 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
587 return 1;
588
589 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
590 return 1;
591
592 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS))
593 return 1;
594
595 if (is_long_mode(vcpu)) {
596 if (!(cr4 & X86_CR4_PAE))
597 return 1;
598 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
599 && ((cr4 ^ old_cr4) & pdptr_bits)
600 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
601 kvm_read_cr3(vcpu)))
602 return 1;
603
604 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
605 if (!guest_cpuid_has_pcid(vcpu))
606 return 1;
607
608
609 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
610 return 1;
611 }
612
613 if (kvm_x86_ops->set_cr4(vcpu, cr4))
614 return 1;
615
616 if (((cr4 ^ old_cr4) & pdptr_bits) ||
617 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
618 kvm_mmu_reset_context(vcpu);
619
620 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
621 kvm_update_cpuid(vcpu);
622
623 return 0;
624}
625EXPORT_SYMBOL_GPL(kvm_set_cr4);
626
627int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
628{
629 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
630 kvm_mmu_sync_roots(vcpu);
631 kvm_mmu_flush_tlb(vcpu);
632 return 0;
633 }
634
635 if (is_long_mode(vcpu)) {
636 if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
637 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
638 return 1;
639 } else
640 if (cr3 & CR3_L_MODE_RESERVED_BITS)
641 return 1;
642 } else {
643 if (is_pae(vcpu)) {
644 if (cr3 & CR3_PAE_RESERVED_BITS)
645 return 1;
646 if (is_paging(vcpu) &&
647 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
648 return 1;
649 }
650
651
652
653
654 }
655
656
657
658
659
660
661
662
663
664
665 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
666 return 1;
667 vcpu->arch.cr3 = cr3;
668 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
669 vcpu->arch.mmu.new_cr3(vcpu);
670 return 0;
671}
672EXPORT_SYMBOL_GPL(kvm_set_cr3);
673
674int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
675{
676 if (cr8 & CR8_RESERVED_BITS)
677 return 1;
678 if (irqchip_in_kernel(vcpu->kvm))
679 kvm_lapic_set_tpr(vcpu, cr8);
680 else
681 vcpu->arch.cr8 = cr8;
682 return 0;
683}
684EXPORT_SYMBOL_GPL(kvm_set_cr8);
685
686unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
687{
688 if (irqchip_in_kernel(vcpu->kvm))
689 return kvm_lapic_get_cr8(vcpu);
690 else
691 return vcpu->arch.cr8;
692}
693EXPORT_SYMBOL_GPL(kvm_get_cr8);
694
695static void kvm_update_dr7(struct kvm_vcpu *vcpu)
696{
697 unsigned long dr7;
698
699 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
700 dr7 = vcpu->arch.guest_debug_dr7;
701 else
702 dr7 = vcpu->arch.dr7;
703 kvm_x86_ops->set_dr7(vcpu, dr7);
704 vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
705}
706
707static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
708{
709 switch (dr) {
710 case 0 ... 3:
711 vcpu->arch.db[dr] = val;
712 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
713 vcpu->arch.eff_db[dr] = val;
714 break;
715 case 4:
716 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
717 return 1;
718
719 case 6:
720 if (val & 0xffffffff00000000ULL)
721 return -1;
722 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
723 break;
724 case 5:
725 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
726 return 1;
727
728 default:
729 if (val & 0xffffffff00000000ULL)
730 return -1;
731 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
732 kvm_update_dr7(vcpu);
733 break;
734 }
735
736 return 0;
737}
738
739int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
740{
741 int res;
742
743 res = __kvm_set_dr(vcpu, dr, val);
744 if (res > 0)
745 kvm_queue_exception(vcpu, UD_VECTOR);
746 else if (res < 0)
747 kvm_inject_gp(vcpu, 0);
748
749 return res;
750}
751EXPORT_SYMBOL_GPL(kvm_set_dr);
752
753static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
754{
755 switch (dr) {
756 case 0 ... 3:
757 *val = vcpu->arch.db[dr];
758 break;
759 case 4:
760 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
761 return 1;
762
763 case 6:
764 *val = vcpu->arch.dr6;
765 break;
766 case 5:
767 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
768 return 1;
769
770 default:
771 *val = vcpu->arch.dr7;
772 break;
773 }
774
775 return 0;
776}
777
778int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
779{
780 if (_kvm_get_dr(vcpu, dr, val)) {
781 kvm_queue_exception(vcpu, UD_VECTOR);
782 return 1;
783 }
784 return 0;
785}
786EXPORT_SYMBOL_GPL(kvm_get_dr);
787
788bool kvm_rdpmc(struct kvm_vcpu *vcpu)
789{
790 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
791 u64 data;
792 int err;
793
794 err = kvm_pmu_read_pmc(vcpu, ecx, &data);
795 if (err)
796 return err;
797 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
798 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
799 return err;
800}
801EXPORT_SYMBOL_GPL(kvm_rdpmc);
802
803
804
805
806
807
808
809
810
811
812#define KVM_SAVE_MSRS_BEGIN 10
813static u32 msrs_to_save[] = {
814 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
815 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
816 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
817 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
818 MSR_KVM_PV_EOI_EN,
819 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
820 MSR_STAR,
821#ifdef CONFIG_X86_64
822 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
823#endif
824 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
825};
826
827static unsigned num_msrs_to_save;
828
829static const u32 emulated_msrs[] = {
830 MSR_IA32_TSCDEADLINE,
831 MSR_IA32_MISC_ENABLE,
832 MSR_IA32_MCG_STATUS,
833 MSR_IA32_MCG_CTL,
834};
835
836static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
837{
838 u64 old_efer = vcpu->arch.efer;
839
840 if (efer & efer_reserved_bits)
841 return 1;
842
843 if (is_paging(vcpu)
844 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
845 return 1;
846
847 if (efer & EFER_FFXSR) {
848 struct kvm_cpuid_entry2 *feat;
849
850 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
851 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
852 return 1;
853 }
854
855 if (efer & EFER_SVME) {
856 struct kvm_cpuid_entry2 *feat;
857
858 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
859 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
860 return 1;
861 }
862
863 efer &= ~EFER_LMA;
864 efer |= vcpu->arch.efer & EFER_LMA;
865
866 kvm_x86_ops->set_efer(vcpu, efer);
867
868 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
869
870
871 if ((efer ^ old_efer) & EFER_NX)
872 kvm_mmu_reset_context(vcpu);
873
874 return 0;
875}
876
877void kvm_enable_efer_bits(u64 mask)
878{
879 efer_reserved_bits &= ~mask;
880}
881EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
882
883
884
885
886
887
888
889int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
890{
891 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
892}
893
894
895
896
897static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
898{
899 return kvm_set_msr(vcpu, index, *data);
900}
901
902static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
903{
904 int version;
905 int r;
906 struct pvclock_wall_clock wc;
907 struct timespec boot;
908
909 if (!wall_clock)
910 return;
911
912 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
913 if (r)
914 return;
915
916 if (version & 1)
917 ++version;
918
919 ++version;
920
921 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
922
923
924
925
926
927
928
929 getboottime(&boot);
930
931 if (kvm->arch.kvmclock_offset) {
932 struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
933 boot = timespec_sub(boot, ts);
934 }
935 wc.sec = boot.tv_sec;
936 wc.nsec = boot.tv_nsec;
937 wc.version = version;
938
939 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
940
941 version++;
942 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
943}
944
945static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
946{
947 uint32_t quotient, remainder;
948
949
950
951 __asm__ ( "divl %4"
952 : "=a" (quotient), "=d" (remainder)
953 : "0" (0), "1" (dividend), "r" (divisor) );
954 return quotient;
955}
956
957static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
958 s8 *pshift, u32 *pmultiplier)
959{
960 uint64_t scaled64;
961 int32_t shift = 0;
962 uint64_t tps64;
963 uint32_t tps32;
964
965 tps64 = base_khz * 1000LL;
966 scaled64 = scaled_khz * 1000LL;
967 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
968 tps64 >>= 1;
969 shift--;
970 }
971
972 tps32 = (uint32_t)tps64;
973 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
974 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
975 scaled64 >>= 1;
976 else
977 tps32 <<= 1;
978 shift++;
979 }
980
981 *pshift = shift;
982 *pmultiplier = div_frac(scaled64, tps32);
983
984 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
985 __func__, base_khz, scaled_khz, shift, *pmultiplier);
986}
987
988static inline u64 get_kernel_ns(void)
989{
990 struct timespec ts;
991
992 WARN_ON(preemptible());
993 ktime_get_ts(&ts);
994 monotonic_to_bootbased(&ts);
995 return timespec_to_ns(&ts);
996}
997
998static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
999unsigned long max_tsc_khz;
1000
1001static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
1002{
1003 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
1004 vcpu->arch.virtual_tsc_shift);
1005}
1006
1007static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1008{
1009 u64 v = (u64)khz * (1000000 + ppm);
1010 do_div(v, 1000000);
1011 return v;
1012}
1013
1014static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1015{
1016 u32 thresh_lo, thresh_hi;
1017 int use_scaling = 0;
1018
1019
1020 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1021 &vcpu->arch.virtual_tsc_shift,
1022 &vcpu->arch.virtual_tsc_mult);
1023 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
1024
1025
1026
1027
1028
1029
1030
1031 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1032 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1033 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1034 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1035 use_scaling = 1;
1036 }
1037 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1038}
1039
1040static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1041{
1042 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1043 vcpu->arch.virtual_tsc_mult,
1044 vcpu->arch.virtual_tsc_shift);
1045 tsc += vcpu->arch.this_tsc_write;
1046 return tsc;
1047}
1048
1049void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1050{
1051 struct kvm *kvm = vcpu->kvm;
1052 u64 offset, ns, elapsed;
1053 unsigned long flags;
1054 s64 usdiff;
1055
1056 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1057 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1058 ns = get_kernel_ns();
1059 elapsed = ns - kvm->arch.last_tsc_nsec;
1060
1061
1062 usdiff = data - kvm->arch.last_tsc_write;
1063#ifdef CONFIG_X86_64
1064 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1065#else
1066
1067 asm("idivl %2; xor %%edx, %%edx"
1068 : "=A"(usdiff)
1069 : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
1070#endif
1071 do_div(elapsed, 1000);
1072 usdiff -= elapsed;
1073 if (usdiff < 0)
1074 usdiff = -usdiff;
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086 if (usdiff < USEC_PER_SEC &&
1087 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1088 if (!check_tsc_unstable()) {
1089 offset = kvm->arch.cur_tsc_offset;
1090 pr_debug("kvm: matched tsc offset for %llu\n", data);
1091 } else {
1092 u64 delta = nsec_to_cycles(vcpu, elapsed);
1093 data += delta;
1094 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1095 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1096 }
1097 } else {
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107 kvm->arch.cur_tsc_generation++;
1108 kvm->arch.cur_tsc_nsec = ns;
1109 kvm->arch.cur_tsc_write = data;
1110 kvm->arch.cur_tsc_offset = offset;
1111 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1112 kvm->arch.cur_tsc_generation, data);
1113 }
1114
1115
1116
1117
1118
1119 kvm->arch.last_tsc_nsec = ns;
1120 kvm->arch.last_tsc_write = data;
1121 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1122
1123
1124 vcpu->arch.hv_clock.tsc_timestamp = 0;
1125 vcpu->arch.last_guest_tsc = data;
1126
1127
1128 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1129 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1130 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1131
1132 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1133 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1134}
1135
1136EXPORT_SYMBOL_GPL(kvm_write_tsc);
1137
1138static int kvm_guest_time_update(struct kvm_vcpu *v)
1139{
1140 unsigned long flags;
1141 struct kvm_vcpu_arch *vcpu = &v->arch;
1142 void *shared_kaddr;
1143 unsigned long this_tsc_khz;
1144 s64 kernel_ns, max_kernel_ns;
1145 u64 tsc_timestamp;
1146 u8 pvclock_flags;
1147
1148
1149 local_irq_save(flags);
1150 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
1151 kernel_ns = get_kernel_ns();
1152 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1153 if (unlikely(this_tsc_khz == 0)) {
1154 local_irq_restore(flags);
1155 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1156 return 1;
1157 }
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169 if (vcpu->tsc_catchup) {
1170 u64 tsc = compute_guest_tsc(v, kernel_ns);
1171 if (tsc > tsc_timestamp) {
1172 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1173 tsc_timestamp = tsc;
1174 }
1175 }
1176
1177 local_irq_restore(flags);
1178
1179 if (!vcpu->time_page)
1180 return 0;
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203 max_kernel_ns = 0;
1204 if (vcpu->hv_clock.tsc_timestamp) {
1205 max_kernel_ns = vcpu->last_guest_tsc -
1206 vcpu->hv_clock.tsc_timestamp;
1207 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1208 vcpu->hv_clock.tsc_to_system_mul,
1209 vcpu->hv_clock.tsc_shift);
1210 max_kernel_ns += vcpu->last_kernel_ns;
1211 }
1212
1213 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1214 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1215 &vcpu->hv_clock.tsc_shift,
1216 &vcpu->hv_clock.tsc_to_system_mul);
1217 vcpu->hw_tsc_khz = this_tsc_khz;
1218 }
1219
1220 if (max_kernel_ns > kernel_ns)
1221 kernel_ns = max_kernel_ns;
1222
1223
1224 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1225 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1226 vcpu->last_kernel_ns = kernel_ns;
1227 vcpu->last_guest_tsc = tsc_timestamp;
1228
1229 pvclock_flags = 0;
1230 if (vcpu->pvclock_set_guest_stopped_request) {
1231 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1232 vcpu->pvclock_set_guest_stopped_request = false;
1233 }
1234
1235 vcpu->hv_clock.flags = pvclock_flags;
1236
1237
1238
1239
1240
1241
1242 vcpu->hv_clock.version += 2;
1243
1244 shared_kaddr = kmap_atomic(vcpu->time_page);
1245
1246 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1247 sizeof(vcpu->hv_clock));
1248
1249 kunmap_atomic(shared_kaddr);
1250
1251 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1252 return 0;
1253}
1254
1255static bool msr_mtrr_valid(unsigned msr)
1256{
1257 switch (msr) {
1258 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1259 case MSR_MTRRfix64K_00000:
1260 case MSR_MTRRfix16K_80000:
1261 case MSR_MTRRfix16K_A0000:
1262 case MSR_MTRRfix4K_C0000:
1263 case MSR_MTRRfix4K_C8000:
1264 case MSR_MTRRfix4K_D0000:
1265 case MSR_MTRRfix4K_D8000:
1266 case MSR_MTRRfix4K_E0000:
1267 case MSR_MTRRfix4K_E8000:
1268 case MSR_MTRRfix4K_F0000:
1269 case MSR_MTRRfix4K_F8000:
1270 case MSR_MTRRdefType:
1271 case MSR_IA32_CR_PAT:
1272 return true;
1273 case 0x2f8:
1274 return true;
1275 }
1276 return false;
1277}
1278
1279static bool valid_pat_type(unsigned t)
1280{
1281 return t < 8 && (1 << t) & 0xf3;
1282}
1283
1284static bool valid_mtrr_type(unsigned t)
1285{
1286 return t < 8 && (1 << t) & 0x73;
1287}
1288
1289static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1290{
1291 int i;
1292
1293 if (!msr_mtrr_valid(msr))
1294 return false;
1295
1296 if (msr == MSR_IA32_CR_PAT) {
1297 for (i = 0; i < 8; i++)
1298 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1299 return false;
1300 return true;
1301 } else if (msr == MSR_MTRRdefType) {
1302 if (data & ~0xcff)
1303 return false;
1304 return valid_mtrr_type(data & 0xff);
1305 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1306 for (i = 0; i < 8 ; i++)
1307 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1308 return false;
1309 return true;
1310 }
1311
1312
1313 return valid_mtrr_type(data & 0xff);
1314}
1315
1316static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1317{
1318 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1319
1320 if (!mtrr_valid(vcpu, msr, data))
1321 return 1;
1322
1323 if (msr == MSR_MTRRdefType) {
1324 vcpu->arch.mtrr_state.def_type = data;
1325 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1326 } else if (msr == MSR_MTRRfix64K_00000)
1327 p[0] = data;
1328 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1329 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1330 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1331 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1332 else if (msr == MSR_IA32_CR_PAT)
1333 vcpu->arch.pat = data;
1334 else {
1335 int idx, is_mtrr_mask;
1336 u64 *pt;
1337
1338 idx = (msr - 0x200) / 2;
1339 is_mtrr_mask = msr - 0x200 - 2 * idx;
1340 if (!is_mtrr_mask)
1341 pt =
1342 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1343 else
1344 pt =
1345 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1346 *pt = data;
1347 }
1348
1349 kvm_mmu_reset_context(vcpu);
1350 return 0;
1351}
1352
1353static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1354{
1355 u64 mcg_cap = vcpu->arch.mcg_cap;
1356 unsigned bank_num = mcg_cap & 0xff;
1357
1358 switch (msr) {
1359 case MSR_IA32_MCG_STATUS:
1360 vcpu->arch.mcg_status = data;
1361 break;
1362 case MSR_IA32_MCG_CTL:
1363 if (!(mcg_cap & MCG_CTL_P))
1364 return 1;
1365 if (data != 0 && data != ~(u64)0)
1366 return -1;
1367 vcpu->arch.mcg_ctl = data;
1368 break;
1369 default:
1370 if (msr >= MSR_IA32_MC0_CTL &&
1371 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1372 u32 offset = msr - MSR_IA32_MC0_CTL;
1373
1374
1375
1376
1377
1378 if ((offset & 0x3) == 0 &&
1379 data != 0 && (data | (1 << 10)) != ~(u64)0)
1380 return -1;
1381 vcpu->arch.mce_banks[offset] = data;
1382 break;
1383 }
1384 return 1;
1385 }
1386 return 0;
1387}
1388
1389static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1390{
1391 struct kvm *kvm = vcpu->kvm;
1392 int lm = is_long_mode(vcpu);
1393 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1394 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1395 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1396 : kvm->arch.xen_hvm_config.blob_size_32;
1397 u32 page_num = data & ~PAGE_MASK;
1398 u64 page_addr = data & PAGE_MASK;
1399 u8 *page;
1400 int r;
1401
1402 r = -E2BIG;
1403 if (page_num >= blob_size)
1404 goto out;
1405 r = -ENOMEM;
1406 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
1407 if (IS_ERR(page)) {
1408 r = PTR_ERR(page);
1409 goto out;
1410 }
1411 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1412 goto out_free;
1413 r = 0;
1414out_free:
1415 kfree(page);
1416out:
1417 return r;
1418}
1419
1420static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1421{
1422 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1423}
1424
1425static bool kvm_hv_msr_partition_wide(u32 msr)
1426{
1427 bool r = false;
1428 switch (msr) {
1429 case HV_X64_MSR_GUEST_OS_ID:
1430 case HV_X64_MSR_HYPERCALL:
1431 r = true;
1432 break;
1433 }
1434
1435 return r;
1436}
1437
1438static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1439{
1440 struct kvm *kvm = vcpu->kvm;
1441
1442 switch (msr) {
1443 case HV_X64_MSR_GUEST_OS_ID:
1444 kvm->arch.hv_guest_os_id = data;
1445
1446 if (!kvm->arch.hv_guest_os_id)
1447 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1448 break;
1449 case HV_X64_MSR_HYPERCALL: {
1450 u64 gfn;
1451 unsigned long addr;
1452 u8 instructions[4];
1453
1454
1455 if (!kvm->arch.hv_guest_os_id)
1456 break;
1457 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1458 kvm->arch.hv_hypercall = data;
1459 break;
1460 }
1461 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1462 addr = gfn_to_hva(kvm, gfn);
1463 if (kvm_is_error_hva(addr))
1464 return 1;
1465 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1466 ((unsigned char *)instructions)[3] = 0xc3;
1467 if (__copy_to_user((void __user *)addr, instructions, 4))
1468 return 1;
1469 kvm->arch.hv_hypercall = data;
1470 break;
1471 }
1472 default:
1473 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1474 "data 0x%llx\n", msr, data);
1475 return 1;
1476 }
1477 return 0;
1478}
1479
1480static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1481{
1482 switch (msr) {
1483 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1484 unsigned long addr;
1485
1486 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1487 vcpu->arch.hv_vapic = data;
1488 break;
1489 }
1490 addr = gfn_to_hva(vcpu->kvm, data >>
1491 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1492 if (kvm_is_error_hva(addr))
1493 return 1;
1494 if (__clear_user((void __user *)addr, PAGE_SIZE))
1495 return 1;
1496 vcpu->arch.hv_vapic = data;
1497 break;
1498 }
1499 case HV_X64_MSR_EOI:
1500 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1501 case HV_X64_MSR_ICR:
1502 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1503 case HV_X64_MSR_TPR:
1504 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1505 default:
1506 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1507 "data 0x%llx\n", msr, data);
1508 return 1;
1509 }
1510
1511 return 0;
1512}
1513
1514static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1515{
1516 gpa_t gpa = data & ~0x3f;
1517
1518
1519 if (data & 0x3c)
1520 return 1;
1521
1522 vcpu->arch.apf.msr_val = data;
1523
1524 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1525 kvm_clear_async_pf_completion_queue(vcpu);
1526 kvm_async_pf_hash_reset(vcpu);
1527 return 0;
1528 }
1529
1530 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
1531 return 1;
1532
1533 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1534 kvm_async_pf_wakeup_all(vcpu);
1535 return 0;
1536}
1537
1538static void kvmclock_reset(struct kvm_vcpu *vcpu)
1539{
1540 if (vcpu->arch.time_page) {
1541 kvm_release_page_dirty(vcpu->arch.time_page);
1542 vcpu->arch.time_page = NULL;
1543 }
1544}
1545
1546static void accumulate_steal_time(struct kvm_vcpu *vcpu)
1547{
1548 u64 delta;
1549
1550 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1551 return;
1552
1553 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
1554 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1555 vcpu->arch.st.accum_steal = delta;
1556}
1557
1558static void record_steal_time(struct kvm_vcpu *vcpu)
1559{
1560 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1561 return;
1562
1563 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1564 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
1565 return;
1566
1567 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
1568 vcpu->arch.st.steal.version += 2;
1569 vcpu->arch.st.accum_steal = 0;
1570
1571 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1572 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1573}
1574
1575int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1576{
1577 bool pr = false;
1578
1579 switch (msr) {
1580 case MSR_EFER:
1581 return set_efer(vcpu, data);
1582 case MSR_K7_HWCR:
1583 data &= ~(u64)0x40;
1584 data &= ~(u64)0x100;
1585 data &= ~(u64)0x8;
1586 if (data != 0) {
1587 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1588 data);
1589 return 1;
1590 }
1591 break;
1592 case MSR_FAM10H_MMIO_CONF_BASE:
1593 if (data != 0) {
1594 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1595 "0x%llx\n", data);
1596 return 1;
1597 }
1598 break;
1599 case MSR_AMD64_NB_CFG:
1600 break;
1601 case MSR_IA32_DEBUGCTLMSR:
1602 if (!data) {
1603
1604 break;
1605 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
1606
1607
1608 return 1;
1609 }
1610 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1611 __func__, data);
1612 break;
1613 case MSR_IA32_UCODE_REV:
1614 case MSR_IA32_UCODE_WRITE:
1615 case MSR_VM_HSAVE_PA:
1616 case MSR_AMD64_PATCH_LOADER:
1617 break;
1618 case 0x200 ... 0x2ff:
1619 return set_msr_mtrr(vcpu, msr, data);
1620 case MSR_IA32_APICBASE:
1621 kvm_set_apic_base(vcpu, data);
1622 break;
1623 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1624 return kvm_x2apic_msr_write(vcpu, msr, data);
1625 case MSR_IA32_TSCDEADLINE:
1626 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1627 break;
1628 case MSR_IA32_MISC_ENABLE:
1629 vcpu->arch.ia32_misc_enable_msr = data;
1630 break;
1631 case MSR_KVM_WALL_CLOCK_NEW:
1632 case MSR_KVM_WALL_CLOCK:
1633 vcpu->kvm->arch.wall_clock = data;
1634 kvm_write_wall_clock(vcpu->kvm, data);
1635 break;
1636 case MSR_KVM_SYSTEM_TIME_NEW:
1637 case MSR_KVM_SYSTEM_TIME: {
1638 kvmclock_reset(vcpu);
1639
1640 vcpu->arch.time = data;
1641 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1642
1643
1644 if (!(data & 1))
1645 break;
1646
1647
1648 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1649
1650 vcpu->arch.time_page =
1651 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1652
1653 if (is_error_page(vcpu->arch.time_page))
1654 vcpu->arch.time_page = NULL;
1655
1656 break;
1657 }
1658 case MSR_KVM_ASYNC_PF_EN:
1659 if (kvm_pv_enable_async_pf(vcpu, data))
1660 return 1;
1661 break;
1662 case MSR_KVM_STEAL_TIME:
1663
1664 if (unlikely(!sched_info_on()))
1665 return 1;
1666
1667 if (data & KVM_STEAL_RESERVED_MASK)
1668 return 1;
1669
1670 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
1671 data & KVM_STEAL_VALID_BITS))
1672 return 1;
1673
1674 vcpu->arch.st.msr_val = data;
1675
1676 if (!(data & KVM_MSR_ENABLED))
1677 break;
1678
1679 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1680
1681 preempt_disable();
1682 accumulate_steal_time(vcpu);
1683 preempt_enable();
1684
1685 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
1686
1687 break;
1688 case MSR_KVM_PV_EOI_EN:
1689 if (kvm_lapic_enable_pv_eoi(vcpu, data))
1690 return 1;
1691 break;
1692
1693 case MSR_IA32_MCG_CTL:
1694 case MSR_IA32_MCG_STATUS:
1695 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1696 return set_msr_mce(vcpu, msr, data);
1697
1698
1699
1700
1701
1702
1703
1704
1705 case MSR_K7_EVNTSEL0:
1706 case MSR_K7_EVNTSEL1:
1707 case MSR_K7_EVNTSEL2:
1708 case MSR_K7_EVNTSEL3:
1709 if (data != 0)
1710 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1711 "0x%x data 0x%llx\n", msr, data);
1712 break;
1713
1714
1715
1716 case MSR_K7_PERFCTR0:
1717 case MSR_K7_PERFCTR1:
1718 case MSR_K7_PERFCTR2:
1719 case MSR_K7_PERFCTR3:
1720 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1721 "0x%x data 0x%llx\n", msr, data);
1722 break;
1723 case MSR_P6_PERFCTR0:
1724 case MSR_P6_PERFCTR1:
1725 pr = true;
1726 case MSR_P6_EVNTSEL0:
1727 case MSR_P6_EVNTSEL1:
1728 if (kvm_pmu_msr(vcpu, msr))
1729 return kvm_pmu_set_msr(vcpu, msr, data);
1730
1731 if (pr || data != 0)
1732 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
1733 "0x%x data 0x%llx\n", msr, data);
1734 break;
1735 case MSR_K7_CLK_CTL:
1736
1737
1738
1739
1740
1741
1742
1743
1744 break;
1745 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1746 if (kvm_hv_msr_partition_wide(msr)) {
1747 int r;
1748 mutex_lock(&vcpu->kvm->lock);
1749 r = set_msr_hyperv_pw(vcpu, msr, data);
1750 mutex_unlock(&vcpu->kvm->lock);
1751 return r;
1752 } else
1753 return set_msr_hyperv(vcpu, msr, data);
1754 break;
1755 case MSR_IA32_BBL_CR_CTL3:
1756
1757
1758
1759 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
1760 break;
1761 case MSR_AMD64_OSVW_ID_LENGTH:
1762 if (!guest_cpuid_has_osvw(vcpu))
1763 return 1;
1764 vcpu->arch.osvw.length = data;
1765 break;
1766 case MSR_AMD64_OSVW_STATUS:
1767 if (!guest_cpuid_has_osvw(vcpu))
1768 return 1;
1769 vcpu->arch.osvw.status = data;
1770 break;
1771 default:
1772 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1773 return xen_hvm_config(vcpu, data);
1774 if (kvm_pmu_msr(vcpu, msr))
1775 return kvm_pmu_set_msr(vcpu, msr, data);
1776 if (!ignore_msrs) {
1777 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
1778 msr, data);
1779 return 1;
1780 } else {
1781 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
1782 msr, data);
1783 break;
1784 }
1785 }
1786 return 0;
1787}
1788EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1789
1790
1791
1792
1793
1794
1795
1796int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1797{
1798 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1799}
1800
1801static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1802{
1803 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1804
1805 if (!msr_mtrr_valid(msr))
1806 return 1;
1807
1808 if (msr == MSR_MTRRdefType)
1809 *pdata = vcpu->arch.mtrr_state.def_type +
1810 (vcpu->arch.mtrr_state.enabled << 10);
1811 else if (msr == MSR_MTRRfix64K_00000)
1812 *pdata = p[0];
1813 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1814 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
1815 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1816 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
1817 else if (msr == MSR_IA32_CR_PAT)
1818 *pdata = vcpu->arch.pat;
1819 else {
1820 int idx, is_mtrr_mask;
1821 u64 *pt;
1822
1823 idx = (msr - 0x200) / 2;
1824 is_mtrr_mask = msr - 0x200 - 2 * idx;
1825 if (!is_mtrr_mask)
1826 pt =
1827 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1828 else
1829 pt =
1830 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1831 *pdata = *pt;
1832 }
1833
1834 return 0;
1835}
1836
1837static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1838{
1839 u64 data;
1840 u64 mcg_cap = vcpu->arch.mcg_cap;
1841 unsigned bank_num = mcg_cap & 0xff;
1842
1843 switch (msr) {
1844 case MSR_IA32_P5_MC_ADDR:
1845 case MSR_IA32_P5_MC_TYPE:
1846 data = 0;
1847 break;
1848 case MSR_IA32_MCG_CAP:
1849 data = vcpu->arch.mcg_cap;
1850 break;
1851 case MSR_IA32_MCG_CTL:
1852 if (!(mcg_cap & MCG_CTL_P))
1853 return 1;
1854 data = vcpu->arch.mcg_ctl;
1855 break;
1856 case MSR_IA32_MCG_STATUS:
1857 data = vcpu->arch.mcg_status;
1858 break;
1859 default:
1860 if (msr >= MSR_IA32_MC0_CTL &&
1861 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1862 u32 offset = msr - MSR_IA32_MC0_CTL;
1863 data = vcpu->arch.mce_banks[offset];
1864 break;
1865 }
1866 return 1;
1867 }
1868 *pdata = data;
1869 return 0;
1870}
1871
1872static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1873{
1874 u64 data = 0;
1875 struct kvm *kvm = vcpu->kvm;
1876
1877 switch (msr) {
1878 case HV_X64_MSR_GUEST_OS_ID:
1879 data = kvm->arch.hv_guest_os_id;
1880 break;
1881 case HV_X64_MSR_HYPERCALL:
1882 data = kvm->arch.hv_hypercall;
1883 break;
1884 default:
1885 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1886 return 1;
1887 }
1888
1889 *pdata = data;
1890 return 0;
1891}
1892
1893static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1894{
1895 u64 data = 0;
1896
1897 switch (msr) {
1898 case HV_X64_MSR_VP_INDEX: {
1899 int r;
1900 struct kvm_vcpu *v;
1901 kvm_for_each_vcpu(r, v, vcpu->kvm)
1902 if (v == vcpu)
1903 data = r;
1904 break;
1905 }
1906 case HV_X64_MSR_EOI:
1907 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1908 case HV_X64_MSR_ICR:
1909 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1910 case HV_X64_MSR_TPR:
1911 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1912 case HV_X64_MSR_APIC_ASSIST_PAGE:
1913 data = vcpu->arch.hv_vapic;
1914 break;
1915 default:
1916 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1917 return 1;
1918 }
1919 *pdata = data;
1920 return 0;
1921}
1922
1923int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1924{
1925 u64 data;
1926
1927 switch (msr) {
1928 case MSR_IA32_PLATFORM_ID:
1929 case MSR_IA32_EBL_CR_POWERON:
1930 case MSR_IA32_DEBUGCTLMSR:
1931 case MSR_IA32_LASTBRANCHFROMIP:
1932 case MSR_IA32_LASTBRANCHTOIP:
1933 case MSR_IA32_LASTINTFROMIP:
1934 case MSR_IA32_LASTINTTOIP:
1935 case MSR_K8_SYSCFG:
1936 case MSR_K7_HWCR:
1937 case MSR_VM_HSAVE_PA:
1938 case MSR_K7_EVNTSEL0:
1939 case MSR_K7_PERFCTR0:
1940 case MSR_K8_INT_PENDING_MSG:
1941 case MSR_AMD64_NB_CFG:
1942 case MSR_FAM10H_MMIO_CONF_BASE:
1943 data = 0;
1944 break;
1945 case MSR_P6_PERFCTR0:
1946 case MSR_P6_PERFCTR1:
1947 case MSR_P6_EVNTSEL0:
1948 case MSR_P6_EVNTSEL1:
1949 if (kvm_pmu_msr(vcpu, msr))
1950 return kvm_pmu_get_msr(vcpu, msr, pdata);
1951 data = 0;
1952 break;
1953 case MSR_IA32_UCODE_REV:
1954 data = 0x100000000ULL;
1955 break;
1956 case MSR_MTRRcap:
1957 data = 0x500 | KVM_NR_VAR_MTRR;
1958 break;
1959 case 0x200 ... 0x2ff:
1960 return get_msr_mtrr(vcpu, msr, pdata);
1961 case 0xcd:
1962 data = 3;
1963 break;
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975 case MSR_EBC_FREQUENCY_ID:
1976 data = 1 << 24;
1977 break;
1978 case MSR_IA32_APICBASE:
1979 data = kvm_get_apic_base(vcpu);
1980 break;
1981 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1982 return kvm_x2apic_msr_read(vcpu, msr, pdata);
1983 break;
1984 case MSR_IA32_TSCDEADLINE:
1985 data = kvm_get_lapic_tscdeadline_msr(vcpu);
1986 break;
1987 case MSR_IA32_MISC_ENABLE:
1988 data = vcpu->arch.ia32_misc_enable_msr;
1989 break;
1990 case MSR_IA32_PERF_STATUS:
1991
1992 data = 1000ULL;
1993
1994 data |= (((uint64_t)4ULL) << 40);
1995 break;
1996 case MSR_EFER:
1997 data = vcpu->arch.efer;
1998 break;
1999 case MSR_KVM_WALL_CLOCK:
2000 case MSR_KVM_WALL_CLOCK_NEW:
2001 data = vcpu->kvm->arch.wall_clock;
2002 break;
2003 case MSR_KVM_SYSTEM_TIME:
2004 case MSR_KVM_SYSTEM_TIME_NEW:
2005 data = vcpu->arch.time;
2006 break;
2007 case MSR_KVM_ASYNC_PF_EN:
2008 data = vcpu->arch.apf.msr_val;
2009 break;
2010 case MSR_KVM_STEAL_TIME:
2011 data = vcpu->arch.st.msr_val;
2012 break;
2013 case MSR_KVM_PV_EOI_EN:
2014 data = vcpu->arch.pv_eoi.msr_val;
2015 break;
2016 case MSR_IA32_P5_MC_ADDR:
2017 case MSR_IA32_P5_MC_TYPE:
2018 case MSR_IA32_MCG_CAP:
2019 case MSR_IA32_MCG_CTL:
2020 case MSR_IA32_MCG_STATUS:
2021 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2022 return get_msr_mce(vcpu, msr, pdata);
2023 case MSR_K7_CLK_CTL:
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033 data = 0x20000000;
2034 break;
2035 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2036 if (kvm_hv_msr_partition_wide(msr)) {
2037 int r;
2038 mutex_lock(&vcpu->kvm->lock);
2039 r = get_msr_hyperv_pw(vcpu, msr, pdata);
2040 mutex_unlock(&vcpu->kvm->lock);
2041 return r;
2042 } else
2043 return get_msr_hyperv(vcpu, msr, pdata);
2044 break;
2045 case MSR_IA32_BBL_CR_CTL3:
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056 data = 0xbe702111;
2057 break;
2058 case MSR_AMD64_OSVW_ID_LENGTH:
2059 if (!guest_cpuid_has_osvw(vcpu))
2060 return 1;
2061 data = vcpu->arch.osvw.length;
2062 break;
2063 case MSR_AMD64_OSVW_STATUS:
2064 if (!guest_cpuid_has_osvw(vcpu))
2065 return 1;
2066 data = vcpu->arch.osvw.status;
2067 break;
2068 default:
2069 if (kvm_pmu_msr(vcpu, msr))
2070 return kvm_pmu_get_msr(vcpu, msr, pdata);
2071 if (!ignore_msrs) {
2072 vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
2073 return 1;
2074 } else {
2075 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
2076 data = 0;
2077 }
2078 break;
2079 }
2080 *pdata = data;
2081 return 0;
2082}
2083EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2084
2085
2086
2087
2088
2089
2090static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2091 struct kvm_msr_entry *entries,
2092 int (*do_msr)(struct kvm_vcpu *vcpu,
2093 unsigned index, u64 *data))
2094{
2095 int i, idx;
2096
2097 idx = srcu_read_lock(&vcpu->kvm->srcu);
2098 for (i = 0; i < msrs->nmsrs; ++i)
2099 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2100 break;
2101 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2102
2103 return i;
2104}
2105
2106
2107
2108
2109
2110
2111static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2112 int (*do_msr)(struct kvm_vcpu *vcpu,
2113 unsigned index, u64 *data),
2114 int writeback)
2115{
2116 struct kvm_msrs msrs;
2117 struct kvm_msr_entry *entries;
2118 int r, n;
2119 unsigned size;
2120
2121 r = -EFAULT;
2122 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2123 goto out;
2124
2125 r = -E2BIG;
2126 if (msrs.nmsrs >= MAX_IO_MSRS)
2127 goto out;
2128
2129 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2130 entries = memdup_user(user_msrs->entries, size);
2131 if (IS_ERR(entries)) {
2132 r = PTR_ERR(entries);
2133 goto out;
2134 }
2135
2136 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2137 if (r < 0)
2138 goto out_free;
2139
2140 r = -EFAULT;
2141 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2142 goto out_free;
2143
2144 r = n;
2145
2146out_free:
2147 kfree(entries);
2148out:
2149 return r;
2150}
2151
2152int kvm_dev_ioctl_check_extension(long ext)
2153{
2154 int r;
2155
2156 switch (ext) {
2157 case KVM_CAP_IRQCHIP:
2158 case KVM_CAP_HLT:
2159 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2160 case KVM_CAP_SET_TSS_ADDR:
2161 case KVM_CAP_EXT_CPUID:
2162 case KVM_CAP_CLOCKSOURCE:
2163 case KVM_CAP_PIT:
2164 case KVM_CAP_NOP_IO_DELAY:
2165 case KVM_CAP_MP_STATE:
2166 case KVM_CAP_SYNC_MMU:
2167 case KVM_CAP_USER_NMI:
2168 case KVM_CAP_REINJECT_CONTROL:
2169 case KVM_CAP_IRQ_INJECT_STATUS:
2170 case KVM_CAP_ASSIGN_DEV_IRQ:
2171 case KVM_CAP_IRQFD:
2172 case KVM_CAP_IOEVENTFD:
2173 case KVM_CAP_PIT2:
2174 case KVM_CAP_PIT_STATE2:
2175 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2176 case KVM_CAP_XEN_HVM:
2177 case KVM_CAP_ADJUST_CLOCK:
2178 case KVM_CAP_VCPU_EVENTS:
2179 case KVM_CAP_HYPERV:
2180 case KVM_CAP_HYPERV_VAPIC:
2181 case KVM_CAP_HYPERV_SPIN:
2182 case KVM_CAP_PCI_SEGMENT:
2183 case KVM_CAP_DEBUGREGS:
2184 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2185 case KVM_CAP_XSAVE:
2186 case KVM_CAP_ASYNC_PF:
2187 case KVM_CAP_GET_TSC_KHZ:
2188 case KVM_CAP_PCI_2_3:
2189 case KVM_CAP_KVMCLOCK_CTRL:
2190 case KVM_CAP_READONLY_MEM:
2191 case KVM_CAP_IRQFD_RESAMPLE:
2192 r = 1;
2193 break;
2194 case KVM_CAP_COALESCED_MMIO:
2195 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2196 break;
2197 case KVM_CAP_VAPIC:
2198 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2199 break;
2200 case KVM_CAP_NR_VCPUS:
2201 r = KVM_SOFT_MAX_VCPUS;
2202 break;
2203 case KVM_CAP_MAX_VCPUS:
2204 r = KVM_MAX_VCPUS;
2205 break;
2206 case KVM_CAP_NR_MEMSLOTS:
2207 r = KVM_MEMORY_SLOTS;
2208 break;
2209 case KVM_CAP_PV_MMU:
2210 r = 0;
2211 break;
2212 case KVM_CAP_IOMMU:
2213 r = iommu_present(&pci_bus_type);
2214 break;
2215 case KVM_CAP_MCE:
2216 r = KVM_MAX_MCE_BANKS;
2217 break;
2218 case KVM_CAP_XCRS:
2219 r = cpu_has_xsave;
2220 break;
2221 case KVM_CAP_TSC_CONTROL:
2222 r = kvm_has_tsc_control;
2223 break;
2224 case KVM_CAP_TSC_DEADLINE_TIMER:
2225 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2226 break;
2227 default:
2228 r = 0;
2229 break;
2230 }
2231 return r;
2232
2233}
2234
2235long kvm_arch_dev_ioctl(struct file *filp,
2236 unsigned int ioctl, unsigned long arg)
2237{
2238 void __user *argp = (void __user *)arg;
2239 long r;
2240
2241 switch (ioctl) {
2242 case KVM_GET_MSR_INDEX_LIST: {
2243 struct kvm_msr_list __user *user_msr_list = argp;
2244 struct kvm_msr_list msr_list;
2245 unsigned n;
2246
2247 r = -EFAULT;
2248 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2249 goto out;
2250 n = msr_list.nmsrs;
2251 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2252 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2253 goto out;
2254 r = -E2BIG;
2255 if (n < msr_list.nmsrs)
2256 goto out;
2257 r = -EFAULT;
2258 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2259 num_msrs_to_save * sizeof(u32)))
2260 goto out;
2261 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2262 &emulated_msrs,
2263 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2264 goto out;
2265 r = 0;
2266 break;
2267 }
2268 case KVM_GET_SUPPORTED_CPUID: {
2269 struct kvm_cpuid2 __user *cpuid_arg = argp;
2270 struct kvm_cpuid2 cpuid;
2271
2272 r = -EFAULT;
2273 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2274 goto out;
2275 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
2276 cpuid_arg->entries);
2277 if (r)
2278 goto out;
2279
2280 r = -EFAULT;
2281 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2282 goto out;
2283 r = 0;
2284 break;
2285 }
2286 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2287 u64 mce_cap;
2288
2289 mce_cap = KVM_MCE_CAP_SUPPORTED;
2290 r = -EFAULT;
2291 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2292 goto out;
2293 r = 0;
2294 break;
2295 }
2296 default:
2297 r = -EINVAL;
2298 }
2299out:
2300 return r;
2301}
2302
2303static void wbinvd_ipi(void *garbage)
2304{
2305 wbinvd();
2306}
2307
2308static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2309{
2310 return vcpu->kvm->arch.iommu_domain &&
2311 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2312}
2313
2314void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2315{
2316
2317 if (need_emulate_wbinvd(vcpu)) {
2318 if (kvm_x86_ops->has_wbinvd_exit())
2319 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2320 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2321 smp_call_function_single(vcpu->cpu,
2322 wbinvd_ipi, NULL, 1);
2323 }
2324
2325 kvm_x86_ops->vcpu_load(vcpu, cpu);
2326
2327
2328 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2329 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2330 vcpu->arch.tsc_offset_adjustment = 0;
2331 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2332 }
2333
2334 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2335 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2336 native_read_tsc() - vcpu->arch.last_host_tsc;
2337 if (tsc_delta < 0)
2338 mark_tsc_unstable("KVM discovered backwards TSC");
2339 if (check_tsc_unstable()) {
2340 u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
2341 vcpu->arch.last_guest_tsc);
2342 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2343 vcpu->arch.tsc_catchup = 1;
2344 }
2345 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2346 if (vcpu->cpu != cpu)
2347 kvm_migrate_timers(vcpu);
2348 vcpu->cpu = cpu;
2349 }
2350
2351 accumulate_steal_time(vcpu);
2352 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2353}
2354
2355void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2356{
2357 kvm_x86_ops->vcpu_put(vcpu);
2358 kvm_put_guest_fpu(vcpu);
2359 vcpu->arch.last_host_tsc = native_read_tsc();
2360}
2361
2362static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2363 struct kvm_lapic_state *s)
2364{
2365 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2366
2367 return 0;
2368}
2369
2370static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2371 struct kvm_lapic_state *s)
2372{
2373 kvm_apic_post_state_restore(vcpu, s);
2374 update_cr8_intercept(vcpu);
2375
2376 return 0;
2377}
2378
2379static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2380 struct kvm_interrupt *irq)
2381{
2382 if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
2383 return -EINVAL;
2384 if (irqchip_in_kernel(vcpu->kvm))
2385 return -ENXIO;
2386
2387 kvm_queue_interrupt(vcpu, irq->irq, false);
2388 kvm_make_request(KVM_REQ_EVENT, vcpu);
2389
2390 return 0;
2391}
2392
2393static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2394{
2395 kvm_inject_nmi(vcpu);
2396
2397 return 0;
2398}
2399
2400static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2401 struct kvm_tpr_access_ctl *tac)
2402{
2403 if (tac->flags)
2404 return -EINVAL;
2405 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2406 return 0;
2407}
2408
2409static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2410 u64 mcg_cap)
2411{
2412 int r;
2413 unsigned bank_num = mcg_cap & 0xff, bank;
2414
2415 r = -EINVAL;
2416 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2417 goto out;
2418 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2419 goto out;
2420 r = 0;
2421 vcpu->arch.mcg_cap = mcg_cap;
2422
2423 if (mcg_cap & MCG_CTL_P)
2424 vcpu->arch.mcg_ctl = ~(u64)0;
2425
2426 for (bank = 0; bank < bank_num; bank++)
2427 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2428out:
2429 return r;
2430}
2431
2432static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2433 struct kvm_x86_mce *mce)
2434{
2435 u64 mcg_cap = vcpu->arch.mcg_cap;
2436 unsigned bank_num = mcg_cap & 0xff;
2437 u64 *banks = vcpu->arch.mce_banks;
2438
2439 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2440 return -EINVAL;
2441
2442
2443
2444
2445 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2446 vcpu->arch.mcg_ctl != ~(u64)0)
2447 return 0;
2448 banks += 4 * mce->bank;
2449
2450
2451
2452
2453 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2454 return 0;
2455 if (mce->status & MCI_STATUS_UC) {
2456 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2457 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2458 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2459 return 0;
2460 }
2461 if (banks[1] & MCI_STATUS_VAL)
2462 mce->status |= MCI_STATUS_OVER;
2463 banks[2] = mce->addr;
2464 banks[3] = mce->misc;
2465 vcpu->arch.mcg_status = mce->mcg_status;
2466 banks[1] = mce->status;
2467 kvm_queue_exception(vcpu, MC_VECTOR);
2468 } else if (!(banks[1] & MCI_STATUS_VAL)
2469 || !(banks[1] & MCI_STATUS_UC)) {
2470 if (banks[1] & MCI_STATUS_VAL)
2471 mce->status |= MCI_STATUS_OVER;
2472 banks[2] = mce->addr;
2473 banks[3] = mce->misc;
2474 banks[1] = mce->status;
2475 } else
2476 banks[1] |= MCI_STATUS_OVER;
2477 return 0;
2478}
2479
2480static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2481 struct kvm_vcpu_events *events)
2482{
2483 process_nmi(vcpu);
2484 events->exception.injected =
2485 vcpu->arch.exception.pending &&
2486 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2487 events->exception.nr = vcpu->arch.exception.nr;
2488 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2489 events->exception.pad = 0;
2490 events->exception.error_code = vcpu->arch.exception.error_code;
2491
2492 events->interrupt.injected =
2493 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2494 events->interrupt.nr = vcpu->arch.interrupt.nr;
2495 events->interrupt.soft = 0;
2496 events->interrupt.shadow =
2497 kvm_x86_ops->get_interrupt_shadow(vcpu,
2498 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2499
2500 events->nmi.injected = vcpu->arch.nmi_injected;
2501 events->nmi.pending = vcpu->arch.nmi_pending != 0;
2502 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2503 events->nmi.pad = 0;
2504
2505 events->sipi_vector = vcpu->arch.sipi_vector;
2506
2507 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2508 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2509 | KVM_VCPUEVENT_VALID_SHADOW);
2510 memset(&events->reserved, 0, sizeof(events->reserved));
2511}
2512
2513static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2514 struct kvm_vcpu_events *events)
2515{
2516 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2517 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2518 | KVM_VCPUEVENT_VALID_SHADOW))
2519 return -EINVAL;
2520
2521 process_nmi(vcpu);
2522 vcpu->arch.exception.pending = events->exception.injected;
2523 vcpu->arch.exception.nr = events->exception.nr;
2524 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2525 vcpu->arch.exception.error_code = events->exception.error_code;
2526
2527 vcpu->arch.interrupt.pending = events->interrupt.injected;
2528 vcpu->arch.interrupt.nr = events->interrupt.nr;
2529 vcpu->arch.interrupt.soft = events->interrupt.soft;
2530 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2531 kvm_x86_ops->set_interrupt_shadow(vcpu,
2532 events->interrupt.shadow);
2533
2534 vcpu->arch.nmi_injected = events->nmi.injected;
2535 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2536 vcpu->arch.nmi_pending = events->nmi.pending;
2537 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2538
2539 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2540 vcpu->arch.sipi_vector = events->sipi_vector;
2541
2542 kvm_make_request(KVM_REQ_EVENT, vcpu);
2543
2544 return 0;
2545}
2546
2547static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2548 struct kvm_debugregs *dbgregs)
2549{
2550 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2551 dbgregs->dr6 = vcpu->arch.dr6;
2552 dbgregs->dr7 = vcpu->arch.dr7;
2553 dbgregs->flags = 0;
2554 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
2555}
2556
2557static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2558 struct kvm_debugregs *dbgregs)
2559{
2560 if (dbgregs->flags)
2561 return -EINVAL;
2562
2563 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2564 vcpu->arch.dr6 = dbgregs->dr6;
2565 vcpu->arch.dr7 = dbgregs->dr7;
2566
2567 return 0;
2568}
2569
2570static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2571 struct kvm_xsave *guest_xsave)
2572{
2573 if (cpu_has_xsave)
2574 memcpy(guest_xsave->region,
2575 &vcpu->arch.guest_fpu.state->xsave,
2576 xstate_size);
2577 else {
2578 memcpy(guest_xsave->region,
2579 &vcpu->arch.guest_fpu.state->fxsave,
2580 sizeof(struct i387_fxsave_struct));
2581 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
2582 XSTATE_FPSSE;
2583 }
2584}
2585
2586static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
2587 struct kvm_xsave *guest_xsave)
2588{
2589 u64 xstate_bv =
2590 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
2591
2592 if (cpu_has_xsave)
2593 memcpy(&vcpu->arch.guest_fpu.state->xsave,
2594 guest_xsave->region, xstate_size);
2595 else {
2596 if (xstate_bv & ~XSTATE_FPSSE)
2597 return -EINVAL;
2598 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
2599 guest_xsave->region, sizeof(struct i387_fxsave_struct));
2600 }
2601 return 0;
2602}
2603
2604static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
2605 struct kvm_xcrs *guest_xcrs)
2606{
2607 if (!cpu_has_xsave) {
2608 guest_xcrs->nr_xcrs = 0;
2609 return;
2610 }
2611
2612 guest_xcrs->nr_xcrs = 1;
2613 guest_xcrs->flags = 0;
2614 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
2615 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
2616}
2617
2618static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
2619 struct kvm_xcrs *guest_xcrs)
2620{
2621 int i, r = 0;
2622
2623 if (!cpu_has_xsave)
2624 return -EINVAL;
2625
2626 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
2627 return -EINVAL;
2628
2629 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
2630
2631 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
2632 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
2633 guest_xcrs->xcrs[0].value);
2634 break;
2635 }
2636 if (r)
2637 r = -EINVAL;
2638 return r;
2639}
2640
2641
2642
2643
2644
2645
2646
2647static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
2648{
2649 if (!vcpu->arch.time_page)
2650 return -EINVAL;
2651 vcpu->arch.pvclock_set_guest_stopped_request = true;
2652 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2653 return 0;
2654}
2655
2656long kvm_arch_vcpu_ioctl(struct file *filp,
2657 unsigned int ioctl, unsigned long arg)
2658{
2659 struct kvm_vcpu *vcpu = filp->private_data;
2660 void __user *argp = (void __user *)arg;
2661 int r;
2662 union {
2663 struct kvm_lapic_state *lapic;
2664 struct kvm_xsave *xsave;
2665 struct kvm_xcrs *xcrs;
2666 void *buffer;
2667 } u;
2668
2669 u.buffer = NULL;
2670 switch (ioctl) {
2671 case KVM_GET_LAPIC: {
2672 r = -EINVAL;
2673 if (!vcpu->arch.apic)
2674 goto out;
2675 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2676
2677 r = -ENOMEM;
2678 if (!u.lapic)
2679 goto out;
2680 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
2681 if (r)
2682 goto out;
2683 r = -EFAULT;
2684 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
2685 goto out;
2686 r = 0;
2687 break;
2688 }
2689 case KVM_SET_LAPIC: {
2690 r = -EINVAL;
2691 if (!vcpu->arch.apic)
2692 goto out;
2693 u.lapic = memdup_user(argp, sizeof(*u.lapic));
2694 if (IS_ERR(u.lapic)) {
2695 r = PTR_ERR(u.lapic);
2696 goto out;
2697 }
2698
2699 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
2700 if (r)
2701 goto out;
2702 r = 0;
2703 break;
2704 }
2705 case KVM_INTERRUPT: {
2706 struct kvm_interrupt irq;
2707
2708 r = -EFAULT;
2709 if (copy_from_user(&irq, argp, sizeof irq))
2710 goto out;
2711 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
2712 if (r)
2713 goto out;
2714 r = 0;
2715 break;
2716 }
2717 case KVM_NMI: {
2718 r = kvm_vcpu_ioctl_nmi(vcpu);
2719 if (r)
2720 goto out;
2721 r = 0;
2722 break;
2723 }
2724 case KVM_SET_CPUID: {
2725 struct kvm_cpuid __user *cpuid_arg = argp;
2726 struct kvm_cpuid cpuid;
2727
2728 r = -EFAULT;
2729 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2730 goto out;
2731 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2732 if (r)
2733 goto out;
2734 break;
2735 }
2736 case KVM_SET_CPUID2: {
2737 struct kvm_cpuid2 __user *cpuid_arg = argp;
2738 struct kvm_cpuid2 cpuid;
2739
2740 r = -EFAULT;
2741 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2742 goto out;
2743 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
2744 cpuid_arg->entries);
2745 if (r)
2746 goto out;
2747 break;
2748 }
2749 case KVM_GET_CPUID2: {
2750 struct kvm_cpuid2 __user *cpuid_arg = argp;
2751 struct kvm_cpuid2 cpuid;
2752
2753 r = -EFAULT;
2754 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2755 goto out;
2756 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
2757 cpuid_arg->entries);
2758 if (r)
2759 goto out;
2760 r = -EFAULT;
2761 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2762 goto out;
2763 r = 0;
2764 break;
2765 }
2766 case KVM_GET_MSRS:
2767 r = msr_io(vcpu, argp, kvm_get_msr, 1);
2768 break;
2769 case KVM_SET_MSRS:
2770 r = msr_io(vcpu, argp, do_set_msr, 0);
2771 break;
2772 case KVM_TPR_ACCESS_REPORTING: {
2773 struct kvm_tpr_access_ctl tac;
2774
2775 r = -EFAULT;
2776 if (copy_from_user(&tac, argp, sizeof tac))
2777 goto out;
2778 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
2779 if (r)
2780 goto out;
2781 r = -EFAULT;
2782 if (copy_to_user(argp, &tac, sizeof tac))
2783 goto out;
2784 r = 0;
2785 break;
2786 };
2787 case KVM_SET_VAPIC_ADDR: {
2788 struct kvm_vapic_addr va;
2789
2790 r = -EINVAL;
2791 if (!irqchip_in_kernel(vcpu->kvm))
2792 goto out;
2793 r = -EFAULT;
2794 if (copy_from_user(&va, argp, sizeof va))
2795 goto out;
2796 r = 0;
2797 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
2798 break;
2799 }
2800 case KVM_X86_SETUP_MCE: {
2801 u64 mcg_cap;
2802
2803 r = -EFAULT;
2804 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
2805 goto out;
2806 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
2807 break;
2808 }
2809 case KVM_X86_SET_MCE: {
2810 struct kvm_x86_mce mce;
2811
2812 r = -EFAULT;
2813 if (copy_from_user(&mce, argp, sizeof mce))
2814 goto out;
2815 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
2816 break;
2817 }
2818 case KVM_GET_VCPU_EVENTS: {
2819 struct kvm_vcpu_events events;
2820
2821 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
2822
2823 r = -EFAULT;
2824 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
2825 break;
2826 r = 0;
2827 break;
2828 }
2829 case KVM_SET_VCPU_EVENTS: {
2830 struct kvm_vcpu_events events;
2831
2832 r = -EFAULT;
2833 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
2834 break;
2835
2836 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2837 break;
2838 }
2839 case KVM_GET_DEBUGREGS: {
2840 struct kvm_debugregs dbgregs;
2841
2842 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
2843
2844 r = -EFAULT;
2845 if (copy_to_user(argp, &dbgregs,
2846 sizeof(struct kvm_debugregs)))
2847 break;
2848 r = 0;
2849 break;
2850 }
2851 case KVM_SET_DEBUGREGS: {
2852 struct kvm_debugregs dbgregs;
2853
2854 r = -EFAULT;
2855 if (copy_from_user(&dbgregs, argp,
2856 sizeof(struct kvm_debugregs)))
2857 break;
2858
2859 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
2860 break;
2861 }
2862 case KVM_GET_XSAVE: {
2863 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
2864 r = -ENOMEM;
2865 if (!u.xsave)
2866 break;
2867
2868 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
2869
2870 r = -EFAULT;
2871 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
2872 break;
2873 r = 0;
2874 break;
2875 }
2876 case KVM_SET_XSAVE: {
2877 u.xsave = memdup_user(argp, sizeof(*u.xsave));
2878 if (IS_ERR(u.xsave)) {
2879 r = PTR_ERR(u.xsave);
2880 goto out;
2881 }
2882
2883 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
2884 break;
2885 }
2886 case KVM_GET_XCRS: {
2887 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
2888 r = -ENOMEM;
2889 if (!u.xcrs)
2890 break;
2891
2892 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
2893
2894 r = -EFAULT;
2895 if (copy_to_user(argp, u.xcrs,
2896 sizeof(struct kvm_xcrs)))
2897 break;
2898 r = 0;
2899 break;
2900 }
2901 case KVM_SET_XCRS: {
2902 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
2903 if (IS_ERR(u.xcrs)) {
2904 r = PTR_ERR(u.xcrs);
2905 goto out;
2906 }
2907
2908 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
2909 break;
2910 }
2911 case KVM_SET_TSC_KHZ: {
2912 u32 user_tsc_khz;
2913
2914 r = -EINVAL;
2915 user_tsc_khz = (u32)arg;
2916
2917 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
2918 goto out;
2919
2920 if (user_tsc_khz == 0)
2921 user_tsc_khz = tsc_khz;
2922
2923 kvm_set_tsc_khz(vcpu, user_tsc_khz);
2924
2925 r = 0;
2926 goto out;
2927 }
2928 case KVM_GET_TSC_KHZ: {
2929 r = vcpu->arch.virtual_tsc_khz;
2930 goto out;
2931 }
2932 case KVM_KVMCLOCK_CTRL: {
2933 r = kvm_set_guest_paused(vcpu);
2934 goto out;
2935 }
2936 default:
2937 r = -EINVAL;
2938 }
2939out:
2940 kfree(u.buffer);
2941 return r;
2942}
2943
2944int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2945{
2946 return VM_FAULT_SIGBUS;
2947}
2948
2949static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
2950{
2951 int ret;
2952
2953 if (addr > (unsigned int)(-3 * PAGE_SIZE))
2954 return -1;
2955 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
2956 return ret;
2957}
2958
2959static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
2960 u64 ident_addr)
2961{
2962 kvm->arch.ept_identity_map_addr = ident_addr;
2963 return 0;
2964}
2965
2966static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
2967 u32 kvm_nr_mmu_pages)
2968{
2969 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
2970 return -EINVAL;
2971
2972 mutex_lock(&kvm->slots_lock);
2973 spin_lock(&kvm->mmu_lock);
2974
2975 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
2976 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
2977
2978 spin_unlock(&kvm->mmu_lock);
2979 mutex_unlock(&kvm->slots_lock);
2980 return 0;
2981}
2982
2983static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
2984{
2985 return kvm->arch.n_max_mmu_pages;
2986}
2987
2988static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2989{
2990 int r;
2991
2992 r = 0;
2993 switch (chip->chip_id) {
2994 case KVM_IRQCHIP_PIC_MASTER:
2995 memcpy(&chip->chip.pic,
2996 &pic_irqchip(kvm)->pics[0],
2997 sizeof(struct kvm_pic_state));
2998 break;
2999 case KVM_IRQCHIP_PIC_SLAVE:
3000 memcpy(&chip->chip.pic,
3001 &pic_irqchip(kvm)->pics[1],
3002 sizeof(struct kvm_pic_state));
3003 break;
3004 case KVM_IRQCHIP_IOAPIC:
3005 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3006 break;
3007 default:
3008 r = -EINVAL;
3009 break;
3010 }
3011 return r;
3012}
3013
3014static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3015{
3016 int r;
3017
3018 r = 0;
3019 switch (chip->chip_id) {
3020 case KVM_IRQCHIP_PIC_MASTER:
3021 spin_lock(&pic_irqchip(kvm)->lock);
3022 memcpy(&pic_irqchip(kvm)->pics[0],
3023 &chip->chip.pic,
3024 sizeof(struct kvm_pic_state));
3025 spin_unlock(&pic_irqchip(kvm)->lock);
3026 break;
3027 case KVM_IRQCHIP_PIC_SLAVE:
3028 spin_lock(&pic_irqchip(kvm)->lock);
3029 memcpy(&pic_irqchip(kvm)->pics[1],
3030 &chip->chip.pic,
3031 sizeof(struct kvm_pic_state));
3032 spin_unlock(&pic_irqchip(kvm)->lock);
3033 break;
3034 case KVM_IRQCHIP_IOAPIC:
3035 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3036 break;
3037 default:
3038 r = -EINVAL;
3039 break;
3040 }
3041 kvm_pic_update_irq(pic_irqchip(kvm));
3042 return r;
3043}
3044
3045static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3046{
3047 int r = 0;
3048
3049 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3050 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3051 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3052 return r;
3053}
3054
3055static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3056{
3057 int r = 0;
3058
3059 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3060 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3061 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3062 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3063 return r;
3064}
3065
3066static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3067{
3068 int r = 0;
3069
3070 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3071 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3072 sizeof(ps->channels));
3073 ps->flags = kvm->arch.vpit->pit_state.flags;
3074 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3075 memset(&ps->reserved, 0, sizeof(ps->reserved));
3076 return r;
3077}
3078
3079static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3080{
3081 int r = 0, start = 0;
3082 u32 prev_legacy, cur_legacy;
3083 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3084 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3085 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3086 if (!prev_legacy && cur_legacy)
3087 start = 1;
3088 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3089 sizeof(kvm->arch.vpit->pit_state.channels));
3090 kvm->arch.vpit->pit_state.flags = ps->flags;
3091 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3092 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3093 return r;
3094}
3095
3096static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3097 struct kvm_reinject_control *control)
3098{
3099 if (!kvm->arch.vpit)
3100 return -ENXIO;
3101 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3102 kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
3103 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3104 return 0;
3105}
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3127{
3128 int r;
3129 struct kvm_memory_slot *memslot;
3130 unsigned long n, i;
3131 unsigned long *dirty_bitmap;
3132 unsigned long *dirty_bitmap_buffer;
3133 bool is_dirty = false;
3134
3135 mutex_lock(&kvm->slots_lock);
3136
3137 r = -EINVAL;
3138 if (log->slot >= KVM_MEMORY_SLOTS)
3139 goto out;
3140
3141 memslot = id_to_memslot(kvm->memslots, log->slot);
3142
3143 dirty_bitmap = memslot->dirty_bitmap;
3144 r = -ENOENT;
3145 if (!dirty_bitmap)
3146 goto out;
3147
3148 n = kvm_dirty_bitmap_bytes(memslot);
3149
3150 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3151 memset(dirty_bitmap_buffer, 0, n);
3152
3153 spin_lock(&kvm->mmu_lock);
3154
3155 for (i = 0; i < n / sizeof(long); i++) {
3156 unsigned long mask;
3157 gfn_t offset;
3158
3159 if (!dirty_bitmap[i])
3160 continue;
3161
3162 is_dirty = true;
3163
3164 mask = xchg(&dirty_bitmap[i], 0);
3165 dirty_bitmap_buffer[i] = mask;
3166
3167 offset = i * BITS_PER_LONG;
3168 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3169 }
3170 if (is_dirty)
3171 kvm_flush_remote_tlbs(kvm);
3172
3173 spin_unlock(&kvm->mmu_lock);
3174
3175 r = -EFAULT;
3176 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3177 goto out;
3178
3179 r = 0;
3180out:
3181 mutex_unlock(&kvm->slots_lock);
3182 return r;
3183}
3184
3185int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
3186{
3187 if (!irqchip_in_kernel(kvm))
3188 return -ENXIO;
3189
3190 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3191 irq_event->irq, irq_event->level);
3192 return 0;
3193}
3194
3195long kvm_arch_vm_ioctl(struct file *filp,
3196 unsigned int ioctl, unsigned long arg)
3197{
3198 struct kvm *kvm = filp->private_data;
3199 void __user *argp = (void __user *)arg;
3200 int r = -ENOTTY;
3201
3202
3203
3204
3205
3206 union {
3207 struct kvm_pit_state ps;
3208 struct kvm_pit_state2 ps2;
3209 struct kvm_pit_config pit_config;
3210 } u;
3211
3212 switch (ioctl) {
3213 case KVM_SET_TSS_ADDR:
3214 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3215 if (r < 0)
3216 goto out;
3217 break;
3218 case KVM_SET_IDENTITY_MAP_ADDR: {
3219 u64 ident_addr;
3220
3221 r = -EFAULT;
3222 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3223 goto out;
3224 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3225 if (r < 0)
3226 goto out;
3227 break;
3228 }
3229 case KVM_SET_NR_MMU_PAGES:
3230 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3231 if (r)
3232 goto out;
3233 break;
3234 case KVM_GET_NR_MMU_PAGES:
3235 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3236 break;
3237 case KVM_CREATE_IRQCHIP: {
3238 struct kvm_pic *vpic;
3239
3240 mutex_lock(&kvm->lock);
3241 r = -EEXIST;
3242 if (kvm->arch.vpic)
3243 goto create_irqchip_unlock;
3244 r = -EINVAL;
3245 if (atomic_read(&kvm->online_vcpus))
3246 goto create_irqchip_unlock;
3247 r = -ENOMEM;
3248 vpic = kvm_create_pic(kvm);
3249 if (vpic) {
3250 r = kvm_ioapic_init(kvm);
3251 if (r) {
3252 mutex_lock(&kvm->slots_lock);
3253 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3254 &vpic->dev_master);
3255 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3256 &vpic->dev_slave);
3257 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3258 &vpic->dev_eclr);
3259 mutex_unlock(&kvm->slots_lock);
3260 kfree(vpic);
3261 goto create_irqchip_unlock;
3262 }
3263 } else
3264 goto create_irqchip_unlock;
3265 smp_wmb();
3266 kvm->arch.vpic = vpic;
3267 smp_wmb();
3268 r = kvm_setup_default_irq_routing(kvm);
3269 if (r) {
3270 mutex_lock(&kvm->slots_lock);
3271 mutex_lock(&kvm->irq_lock);
3272 kvm_ioapic_destroy(kvm);
3273 kvm_destroy_pic(kvm);
3274 mutex_unlock(&kvm->irq_lock);
3275 mutex_unlock(&kvm->slots_lock);
3276 }
3277 create_irqchip_unlock:
3278 mutex_unlock(&kvm->lock);
3279 break;
3280 }
3281 case KVM_CREATE_PIT:
3282 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3283 goto create_pit;
3284 case KVM_CREATE_PIT2:
3285 r = -EFAULT;
3286 if (copy_from_user(&u.pit_config, argp,
3287 sizeof(struct kvm_pit_config)))
3288 goto out;
3289 create_pit:
3290 mutex_lock(&kvm->slots_lock);
3291 r = -EEXIST;
3292 if (kvm->arch.vpit)
3293 goto create_pit_unlock;
3294 r = -ENOMEM;
3295 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3296 if (kvm->arch.vpit)
3297 r = 0;
3298 create_pit_unlock:
3299 mutex_unlock(&kvm->slots_lock);
3300 break;
3301 case KVM_GET_IRQCHIP: {
3302
3303 struct kvm_irqchip *chip;
3304
3305 chip = memdup_user(argp, sizeof(*chip));
3306 if (IS_ERR(chip)) {
3307 r = PTR_ERR(chip);
3308 goto out;
3309 }
3310
3311 r = -ENXIO;
3312 if (!irqchip_in_kernel(kvm))
3313 goto get_irqchip_out;
3314 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3315 if (r)
3316 goto get_irqchip_out;
3317 r = -EFAULT;
3318 if (copy_to_user(argp, chip, sizeof *chip))
3319 goto get_irqchip_out;
3320 r = 0;
3321 get_irqchip_out:
3322 kfree(chip);
3323 if (r)
3324 goto out;
3325 break;
3326 }
3327 case KVM_SET_IRQCHIP: {
3328
3329 struct kvm_irqchip *chip;
3330
3331 chip = memdup_user(argp, sizeof(*chip));
3332 if (IS_ERR(chip)) {
3333 r = PTR_ERR(chip);
3334 goto out;
3335 }
3336
3337 r = -ENXIO;
3338 if (!irqchip_in_kernel(kvm))
3339 goto set_irqchip_out;
3340 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3341 if (r)
3342 goto set_irqchip_out;
3343 r = 0;
3344 set_irqchip_out:
3345 kfree(chip);
3346 if (r)
3347 goto out;
3348 break;
3349 }
3350 case KVM_GET_PIT: {
3351 r = -EFAULT;
3352 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3353 goto out;
3354 r = -ENXIO;
3355 if (!kvm->arch.vpit)
3356 goto out;
3357 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3358 if (r)
3359 goto out;
3360 r = -EFAULT;
3361 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3362 goto out;
3363 r = 0;
3364 break;
3365 }
3366 case KVM_SET_PIT: {
3367 r = -EFAULT;
3368 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3369 goto out;
3370 r = -ENXIO;
3371 if (!kvm->arch.vpit)
3372 goto out;
3373 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3374 if (r)
3375 goto out;
3376 r = 0;
3377 break;
3378 }
3379 case KVM_GET_PIT2: {
3380 r = -ENXIO;
3381 if (!kvm->arch.vpit)
3382 goto out;
3383 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3384 if (r)
3385 goto out;
3386 r = -EFAULT;
3387 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3388 goto out;
3389 r = 0;
3390 break;
3391 }
3392 case KVM_SET_PIT2: {
3393 r = -EFAULT;
3394 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3395 goto out;
3396 r = -ENXIO;
3397 if (!kvm->arch.vpit)
3398 goto out;
3399 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3400 if (r)
3401 goto out;
3402 r = 0;
3403 break;
3404 }
3405 case KVM_REINJECT_CONTROL: {
3406 struct kvm_reinject_control control;
3407 r = -EFAULT;
3408 if (copy_from_user(&control, argp, sizeof(control)))
3409 goto out;
3410 r = kvm_vm_ioctl_reinject(kvm, &control);
3411 if (r)
3412 goto out;
3413 r = 0;
3414 break;
3415 }
3416 case KVM_XEN_HVM_CONFIG: {
3417 r = -EFAULT;
3418 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3419 sizeof(struct kvm_xen_hvm_config)))
3420 goto out;
3421 r = -EINVAL;
3422 if (kvm->arch.xen_hvm_config.flags)
3423 goto out;
3424 r = 0;
3425 break;
3426 }
3427 case KVM_SET_CLOCK: {
3428 struct kvm_clock_data user_ns;
3429 u64 now_ns;
3430 s64 delta;
3431
3432 r = -EFAULT;
3433 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3434 goto out;
3435
3436 r = -EINVAL;
3437 if (user_ns.flags)
3438 goto out;
3439
3440 r = 0;
3441 local_irq_disable();
3442 now_ns = get_kernel_ns();
3443 delta = user_ns.clock - now_ns;
3444 local_irq_enable();
3445 kvm->arch.kvmclock_offset = delta;
3446 break;
3447 }
3448 case KVM_GET_CLOCK: {
3449 struct kvm_clock_data user_ns;
3450 u64 now_ns;
3451
3452 local_irq_disable();
3453 now_ns = get_kernel_ns();
3454 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3455 local_irq_enable();
3456 user_ns.flags = 0;
3457 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3458
3459 r = -EFAULT;
3460 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3461 goto out;
3462 r = 0;
3463 break;
3464 }
3465
3466 default:
3467 ;
3468 }
3469out:
3470 return r;
3471}
3472
3473static void kvm_init_msr_list(void)
3474{
3475 u32 dummy[2];
3476 unsigned i, j;
3477
3478
3479 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3480 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3481 continue;
3482 if (j < i)
3483 msrs_to_save[j] = msrs_to_save[i];
3484 j++;
3485 }
3486 num_msrs_to_save = j;
3487}
3488
3489static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3490 const void *v)
3491{
3492 int handled = 0;
3493 int n;
3494
3495 do {
3496 n = min(len, 8);
3497 if (!(vcpu->arch.apic &&
3498 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
3499 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3500 break;
3501 handled += n;
3502 addr += n;
3503 len -= n;
3504 v += n;
3505 } while (len);
3506
3507 return handled;
3508}
3509
3510static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3511{
3512 int handled = 0;
3513 int n;
3514
3515 do {
3516 n = min(len, 8);
3517 if (!(vcpu->arch.apic &&
3518 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
3519 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3520 break;
3521 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
3522 handled += n;
3523 addr += n;
3524 len -= n;
3525 v += n;
3526 } while (len);
3527
3528 return handled;
3529}
3530
3531static void kvm_set_segment(struct kvm_vcpu *vcpu,
3532 struct kvm_segment *var, int seg)
3533{
3534 kvm_x86_ops->set_segment(vcpu, var, seg);
3535}
3536
3537void kvm_get_segment(struct kvm_vcpu *vcpu,
3538 struct kvm_segment *var, int seg)
3539{
3540 kvm_x86_ops->get_segment(vcpu, var, seg);
3541}
3542
3543gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3544{
3545 gpa_t t_gpa;
3546 struct x86_exception exception;
3547
3548 BUG_ON(!mmu_is_nested(vcpu));
3549
3550
3551 access |= PFERR_USER_MASK;
3552 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
3553
3554 return t_gpa;
3555}
3556
3557gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
3558 struct x86_exception *exception)
3559{
3560 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3561 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3562}
3563
3564 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
3565 struct x86_exception *exception)
3566{
3567 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3568 access |= PFERR_FETCH_MASK;
3569 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3570}
3571
3572gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
3573 struct x86_exception *exception)
3574{
3575 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3576 access |= PFERR_WRITE_MASK;
3577 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3578}
3579
3580
3581gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
3582 struct x86_exception *exception)
3583{
3584 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
3585}
3586
3587static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3588 struct kvm_vcpu *vcpu, u32 access,
3589 struct x86_exception *exception)
3590{
3591 void *data = val;
3592 int r = X86EMUL_CONTINUE;
3593
3594 while (bytes) {
3595 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
3596 exception);
3597 unsigned offset = addr & (PAGE_SIZE-1);
3598 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3599 int ret;
3600
3601 if (gpa == UNMAPPED_GVA)
3602 return X86EMUL_PROPAGATE_FAULT;
3603 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3604 if (ret < 0) {
3605 r = X86EMUL_IO_NEEDED;
3606 goto out;
3607 }
3608
3609 bytes -= toread;
3610 data += toread;
3611 addr += toread;
3612 }
3613out:
3614 return r;
3615}
3616
3617
3618static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
3619 gva_t addr, void *val, unsigned int bytes,
3620 struct x86_exception *exception)
3621{
3622 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3623 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3624
3625 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3626 access | PFERR_FETCH_MASK,
3627 exception);
3628}
3629
3630int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
3631 gva_t addr, void *val, unsigned int bytes,
3632 struct x86_exception *exception)
3633{
3634 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3635 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3636
3637 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3638 exception);
3639}
3640EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
3641
3642static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3643 gva_t addr, void *val, unsigned int bytes,
3644 struct x86_exception *exception)
3645{
3646 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3647 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
3648}
3649
3650int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3651 gva_t addr, void *val,
3652 unsigned int bytes,
3653 struct x86_exception *exception)
3654{
3655 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3656 void *data = val;
3657 int r = X86EMUL_CONTINUE;
3658
3659 while (bytes) {
3660 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
3661 PFERR_WRITE_MASK,
3662 exception);
3663 unsigned offset = addr & (PAGE_SIZE-1);
3664 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3665 int ret;
3666
3667 if (gpa == UNMAPPED_GVA)
3668 return X86EMUL_PROPAGATE_FAULT;
3669 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
3670 if (ret < 0) {
3671 r = X86EMUL_IO_NEEDED;
3672 goto out;
3673 }
3674
3675 bytes -= towrite;
3676 data += towrite;
3677 addr += towrite;
3678 }
3679out:
3680 return r;
3681}
3682EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
3683
3684static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
3685 gpa_t *gpa, struct x86_exception *exception,
3686 bool write)
3687{
3688 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
3689 | (write ? PFERR_WRITE_MASK : 0);
3690
3691 if (vcpu_match_mmio_gva(vcpu, gva)
3692 && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
3693 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
3694 (gva & (PAGE_SIZE - 1));
3695 trace_vcpu_match_mmio(gva, *gpa, write, false);
3696 return 1;
3697 }
3698
3699 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3700
3701 if (*gpa == UNMAPPED_GVA)
3702 return -1;
3703
3704
3705 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3706 return 1;
3707
3708 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
3709 trace_vcpu_match_mmio(gva, *gpa, write, true);
3710 return 1;
3711 }
3712
3713 return 0;
3714}
3715
3716int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
3717 const void *val, int bytes)
3718{
3719 int ret;
3720
3721 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
3722 if (ret < 0)
3723 return 0;
3724 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
3725 return 1;
3726}
3727
3728struct read_write_emulator_ops {
3729 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
3730 int bytes);
3731 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
3732 void *val, int bytes);
3733 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
3734 int bytes, void *val);
3735 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
3736 void *val, int bytes);
3737 bool write;
3738};
3739
3740static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
3741{
3742 if (vcpu->mmio_read_completed) {
3743 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
3744 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
3745 vcpu->mmio_read_completed = 0;
3746 return 1;
3747 }
3748
3749 return 0;
3750}
3751
3752static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
3753 void *val, int bytes)
3754{
3755 return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
3756}
3757
3758static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
3759 void *val, int bytes)
3760{
3761 return emulator_write_phys(vcpu, gpa, val, bytes);
3762}
3763
3764static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
3765{
3766 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
3767 return vcpu_mmio_write(vcpu, gpa, bytes, val);
3768}
3769
3770static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
3771 void *val, int bytes)
3772{
3773 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
3774 return X86EMUL_IO_NEEDED;
3775}
3776
3777static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
3778 void *val, int bytes)
3779{
3780 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
3781
3782 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
3783 return X86EMUL_CONTINUE;
3784}
3785
3786static const struct read_write_emulator_ops read_emultor = {
3787 .read_write_prepare = read_prepare,
3788 .read_write_emulate = read_emulate,
3789 .read_write_mmio = vcpu_mmio_read,
3790 .read_write_exit_mmio = read_exit_mmio,
3791};
3792
3793static const struct read_write_emulator_ops write_emultor = {
3794 .read_write_emulate = write_emulate,
3795 .read_write_mmio = write_mmio,
3796 .read_write_exit_mmio = write_exit_mmio,
3797 .write = true,
3798};
3799
3800static int emulator_read_write_onepage(unsigned long addr, void *val,
3801 unsigned int bytes,
3802 struct x86_exception *exception,
3803 struct kvm_vcpu *vcpu,
3804 const struct read_write_emulator_ops *ops)
3805{
3806 gpa_t gpa;
3807 int handled, ret;
3808 bool write = ops->write;
3809 struct kvm_mmio_fragment *frag;
3810
3811 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
3812
3813 if (ret < 0)
3814 return X86EMUL_PROPAGATE_FAULT;
3815
3816
3817 if (ret)
3818 goto mmio;
3819
3820 if (ops->read_write_emulate(vcpu, gpa, val, bytes))
3821 return X86EMUL_CONTINUE;
3822
3823mmio:
3824
3825
3826
3827 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
3828 if (handled == bytes)
3829 return X86EMUL_CONTINUE;
3830
3831 gpa += handled;
3832 bytes -= handled;
3833 val += handled;
3834
3835 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
3836 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
3837 frag->gpa = gpa;
3838 frag->data = val;
3839 frag->len = bytes;
3840 return X86EMUL_CONTINUE;
3841}
3842
3843int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
3844 void *val, unsigned int bytes,
3845 struct x86_exception *exception,
3846 const struct read_write_emulator_ops *ops)
3847{
3848 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3849 gpa_t gpa;
3850 int rc;
3851
3852 if (ops->read_write_prepare &&
3853 ops->read_write_prepare(vcpu, val, bytes))
3854 return X86EMUL_CONTINUE;
3855
3856 vcpu->mmio_nr_fragments = 0;
3857
3858
3859 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
3860 int now;
3861
3862 now = -addr & ~PAGE_MASK;
3863 rc = emulator_read_write_onepage(addr, val, now, exception,
3864 vcpu, ops);
3865
3866 if (rc != X86EMUL_CONTINUE)
3867 return rc;
3868 addr += now;
3869 val += now;
3870 bytes -= now;
3871 }
3872
3873 rc = emulator_read_write_onepage(addr, val, bytes, exception,
3874 vcpu, ops);
3875 if (rc != X86EMUL_CONTINUE)
3876 return rc;
3877
3878 if (!vcpu->mmio_nr_fragments)
3879 return rc;
3880
3881 gpa = vcpu->mmio_fragments[0].gpa;
3882
3883 vcpu->mmio_needed = 1;
3884 vcpu->mmio_cur_fragment = 0;
3885
3886 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
3887 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
3888 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3889 vcpu->run->mmio.phys_addr = gpa;
3890
3891 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
3892}
3893
3894static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
3895 unsigned long addr,
3896 void *val,
3897 unsigned int bytes,
3898 struct x86_exception *exception)
3899{
3900 return emulator_read_write(ctxt, addr, val, bytes,
3901 exception, &read_emultor);
3902}
3903
3904int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
3905 unsigned long addr,
3906 const void *val,
3907 unsigned int bytes,
3908 struct x86_exception *exception)
3909{
3910 return emulator_read_write(ctxt, addr, (void *)val, bytes,
3911 exception, &write_emultor);
3912}
3913
3914#define CMPXCHG_TYPE(t, ptr, old, new) \
3915 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
3916
3917#ifdef CONFIG_X86_64
3918# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
3919#else
3920# define CMPXCHG64(ptr, old, new) \
3921 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
3922#endif
3923
3924static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
3925 unsigned long addr,
3926 const void *old,
3927 const void *new,
3928 unsigned int bytes,
3929 struct x86_exception *exception)
3930{
3931 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3932 gpa_t gpa;
3933 struct page *page;
3934 char *kaddr;
3935 bool exchanged;
3936
3937
3938 if (bytes > 8 || (bytes & (bytes - 1)))
3939 goto emul_write;
3940
3941 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
3942
3943 if (gpa == UNMAPPED_GVA ||
3944 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3945 goto emul_write;
3946
3947 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3948 goto emul_write;
3949
3950 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3951 if (is_error_page(page))
3952 goto emul_write;
3953
3954 kaddr = kmap_atomic(page);
3955 kaddr += offset_in_page(gpa);
3956 switch (bytes) {
3957 case 1:
3958 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
3959 break;
3960 case 2:
3961 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
3962 break;
3963 case 4:
3964 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
3965 break;
3966 case 8:
3967 exchanged = CMPXCHG64(kaddr, old, new);
3968 break;
3969 default:
3970 BUG();
3971 }
3972 kunmap_atomic(kaddr);
3973 kvm_release_page_dirty(page);
3974
3975 if (!exchanged)
3976 return X86EMUL_CMPXCHG_FAILED;
3977
3978 kvm_mmu_pte_write(vcpu, gpa, new, bytes);
3979
3980 return X86EMUL_CONTINUE;
3981
3982emul_write:
3983 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3984
3985 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
3986}
3987
3988static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3989{
3990
3991 int r;
3992
3993 if (vcpu->arch.pio.in)
3994 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3995 vcpu->arch.pio.size, pd);
3996 else
3997 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3998 vcpu->arch.pio.port, vcpu->arch.pio.size,
3999 pd);
4000 return r;
4001}
4002
4003static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4004 unsigned short port, void *val,
4005 unsigned int count, bool in)
4006{
4007 trace_kvm_pio(!in, port, size, count);
4008
4009 vcpu->arch.pio.port = port;
4010 vcpu->arch.pio.in = in;
4011 vcpu->arch.pio.count = count;
4012 vcpu->arch.pio.size = size;
4013
4014 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4015 vcpu->arch.pio.count = 0;
4016 return 1;
4017 }
4018
4019 vcpu->run->exit_reason = KVM_EXIT_IO;
4020 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
4021 vcpu->run->io.size = size;
4022 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4023 vcpu->run->io.count = count;
4024 vcpu->run->io.port = port;
4025
4026 return 0;
4027}
4028
4029static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4030 int size, unsigned short port, void *val,
4031 unsigned int count)
4032{
4033 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4034 int ret;
4035
4036 if (vcpu->arch.pio.count)
4037 goto data_avail;
4038
4039 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
4040 if (ret) {
4041data_avail:
4042 memcpy(val, vcpu->arch.pio_data, size * count);
4043 vcpu->arch.pio.count = 0;
4044 return 1;
4045 }
4046
4047 return 0;
4048}
4049
4050static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4051 int size, unsigned short port,
4052 const void *val, unsigned int count)
4053{
4054 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4055
4056 memcpy(vcpu->arch.pio_data, val, size * count);
4057 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4058}
4059
4060static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4061{
4062 return kvm_x86_ops->get_segment_base(vcpu, seg);
4063}
4064
4065static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4066{
4067 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4068}
4069
4070int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4071{
4072 if (!need_emulate_wbinvd(vcpu))
4073 return X86EMUL_CONTINUE;
4074
4075 if (kvm_x86_ops->has_wbinvd_exit()) {
4076 int cpu = get_cpu();
4077
4078 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4079 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4080 wbinvd_ipi, NULL, 1);
4081 put_cpu();
4082 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4083 } else
4084 wbinvd();
4085 return X86EMUL_CONTINUE;
4086}
4087EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4088
4089static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4090{
4091 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4092}
4093
4094int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4095{
4096 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4097}
4098
4099int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4100{
4101
4102 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4103}
4104
4105static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4106{
4107 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4108}
4109
4110static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4111{
4112 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4113 unsigned long value;
4114
4115 switch (cr) {
4116 case 0:
4117 value = kvm_read_cr0(vcpu);
4118 break;
4119 case 2:
4120 value = vcpu->arch.cr2;
4121 break;
4122 case 3:
4123 value = kvm_read_cr3(vcpu);
4124 break;
4125 case 4:
4126 value = kvm_read_cr4(vcpu);
4127 break;
4128 case 8:
4129 value = kvm_get_cr8(vcpu);
4130 break;
4131 default:
4132 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4133 return 0;
4134 }
4135
4136 return value;
4137}
4138
4139static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4140{
4141 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4142 int res = 0;
4143
4144 switch (cr) {
4145 case 0:
4146 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4147 break;
4148 case 2:
4149 vcpu->arch.cr2 = val;
4150 break;
4151 case 3:
4152 res = kvm_set_cr3(vcpu, val);
4153 break;
4154 case 4:
4155 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4156 break;
4157 case 8:
4158 res = kvm_set_cr8(vcpu, val);
4159 break;
4160 default:
4161 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4162 res = -1;
4163 }
4164
4165 return res;
4166}
4167
4168static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
4169{
4170 kvm_set_rflags(emul_to_vcpu(ctxt), val);
4171}
4172
4173static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4174{
4175 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4176}
4177
4178static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4179{
4180 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4181}
4182
4183static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4184{
4185 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4186}
4187
4188static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4189{
4190 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4191}
4192
4193static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4194{
4195 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4196}
4197
4198static unsigned long emulator_get_cached_segment_base(
4199 struct x86_emulate_ctxt *ctxt, int seg)
4200{
4201 return get_segment_base(emul_to_vcpu(ctxt), seg);
4202}
4203
4204static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4205 struct desc_struct *desc, u32 *base3,
4206 int seg)
4207{
4208 struct kvm_segment var;
4209
4210 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4211 *selector = var.selector;
4212
4213 if (var.unusable)
4214 return false;
4215
4216 if (var.g)
4217 var.limit >>= 12;
4218 set_desc_limit(desc, var.limit);
4219 set_desc_base(desc, (unsigned long)var.base);
4220#ifdef CONFIG_X86_64
4221 if (base3)
4222 *base3 = var.base >> 32;
4223#endif
4224 desc->type = var.type;
4225 desc->s = var.s;
4226 desc->dpl = var.dpl;
4227 desc->p = var.present;
4228 desc->avl = var.avl;
4229 desc->l = var.l;
4230 desc->d = var.db;
4231 desc->g = var.g;
4232
4233 return true;
4234}
4235
4236static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4237 struct desc_struct *desc, u32 base3,
4238 int seg)
4239{
4240 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4241 struct kvm_segment var;
4242
4243 var.selector = selector;
4244 var.base = get_desc_base(desc);
4245#ifdef CONFIG_X86_64
4246 var.base |= ((u64)base3) << 32;
4247#endif
4248 var.limit = get_desc_limit(desc);
4249 if (desc->g)
4250 var.limit = (var.limit << 12) | 0xfff;
4251 var.type = desc->type;
4252 var.present = desc->p;
4253 var.dpl = desc->dpl;
4254 var.db = desc->d;
4255 var.s = desc->s;
4256 var.l = desc->l;
4257 var.g = desc->g;
4258 var.avl = desc->avl;
4259 var.present = desc->p;
4260 var.unusable = !var.present;
4261 var.padding = 0;
4262
4263 kvm_set_segment(vcpu, &var, seg);
4264 return;
4265}
4266
4267static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4268 u32 msr_index, u64 *pdata)
4269{
4270 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4271}
4272
4273static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4274 u32 msr_index, u64 data)
4275{
4276 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
4277}
4278
4279static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
4280 u32 pmc, u64 *pdata)
4281{
4282 return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
4283}
4284
4285static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4286{
4287 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4288}
4289
4290static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4291{
4292 preempt_disable();
4293 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4294
4295
4296
4297
4298 clts();
4299}
4300
4301static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4302{
4303 preempt_enable();
4304}
4305
4306static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4307 struct x86_instruction_info *info,
4308 enum x86_intercept_stage stage)
4309{
4310 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4311}
4312
4313static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4314 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4315{
4316 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
4317}
4318
4319static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
4320{
4321 return kvm_register_read(emul_to_vcpu(ctxt), reg);
4322}
4323
4324static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
4325{
4326 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
4327}
4328
4329static const struct x86_emulate_ops emulate_ops = {
4330 .read_gpr = emulator_read_gpr,
4331 .write_gpr = emulator_write_gpr,
4332 .read_std = kvm_read_guest_virt_system,
4333 .write_std = kvm_write_guest_virt_system,
4334 .fetch = kvm_fetch_guest_virt,
4335 .read_emulated = emulator_read_emulated,
4336 .write_emulated = emulator_write_emulated,
4337 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4338 .invlpg = emulator_invlpg,
4339 .pio_in_emulated = emulator_pio_in_emulated,
4340 .pio_out_emulated = emulator_pio_out_emulated,
4341 .get_segment = emulator_get_segment,
4342 .set_segment = emulator_set_segment,
4343 .get_cached_segment_base = emulator_get_cached_segment_base,
4344 .get_gdt = emulator_get_gdt,
4345 .get_idt = emulator_get_idt,
4346 .set_gdt = emulator_set_gdt,
4347 .set_idt = emulator_set_idt,
4348 .get_cr = emulator_get_cr,
4349 .set_cr = emulator_set_cr,
4350 .set_rflags = emulator_set_rflags,
4351 .cpl = emulator_get_cpl,
4352 .get_dr = emulator_get_dr,
4353 .set_dr = emulator_set_dr,
4354 .set_msr = emulator_set_msr,
4355 .get_msr = emulator_get_msr,
4356 .read_pmc = emulator_read_pmc,
4357 .halt = emulator_halt,
4358 .wbinvd = emulator_wbinvd,
4359 .fix_hypercall = emulator_fix_hypercall,
4360 .get_fpu = emulator_get_fpu,
4361 .put_fpu = emulator_put_fpu,
4362 .intercept = emulator_intercept,
4363 .get_cpuid = emulator_get_cpuid,
4364};
4365
4366static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4367{
4368 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4369
4370
4371
4372
4373
4374
4375
4376 if (!(int_shadow & mask))
4377 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4378}
4379
4380static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4381{
4382 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4383 if (ctxt->exception.vector == PF_VECTOR)
4384 kvm_propagate_fault(vcpu, &ctxt->exception);
4385 else if (ctxt->exception.error_code_valid)
4386 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4387 ctxt->exception.error_code);
4388 else
4389 kvm_queue_exception(vcpu, ctxt->exception.vector);
4390}
4391
4392static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4393{
4394 memset(&ctxt->twobyte, 0,
4395 (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
4396
4397 ctxt->fetch.start = 0;
4398 ctxt->fetch.end = 0;
4399 ctxt->io_read.pos = 0;
4400 ctxt->io_read.end = 0;
4401 ctxt->mem_read.pos = 0;
4402 ctxt->mem_read.end = 0;
4403}
4404
4405static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4406{
4407 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4408 int cs_db, cs_l;
4409
4410 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4411
4412 ctxt->eflags = kvm_get_rflags(vcpu);
4413 ctxt->eip = kvm_rip_read(vcpu);
4414 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4415 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4416 cs_l ? X86EMUL_MODE_PROT64 :
4417 cs_db ? X86EMUL_MODE_PROT32 :
4418 X86EMUL_MODE_PROT16;
4419 ctxt->guest_mode = is_guest_mode(vcpu);
4420
4421 init_decode_cache(ctxt);
4422 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4423}
4424
4425int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4426{
4427 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4428 int ret;
4429
4430 init_emulate_ctxt(vcpu);
4431
4432 ctxt->op_bytes = 2;
4433 ctxt->ad_bytes = 2;
4434 ctxt->_eip = ctxt->eip + inc_eip;
4435 ret = emulate_int_real(ctxt, irq);
4436
4437 if (ret != X86EMUL_CONTINUE)
4438 return EMULATE_FAIL;
4439
4440 ctxt->eip = ctxt->_eip;
4441 kvm_rip_write(vcpu, ctxt->eip);
4442 kvm_set_rflags(vcpu, ctxt->eflags);
4443
4444 if (irq == NMI_VECTOR)
4445 vcpu->arch.nmi_pending = 0;
4446 else
4447 vcpu->arch.interrupt.pending = false;
4448
4449 return EMULATE_DONE;
4450}
4451EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4452
4453static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4454{
4455 int r = EMULATE_DONE;
4456
4457 ++vcpu->stat.insn_emulation_fail;
4458 trace_kvm_emulate_insn_failed(vcpu);
4459 if (!is_guest_mode(vcpu)) {
4460 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4461 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4462 vcpu->run->internal.ndata = 0;
4463 r = EMULATE_FAIL;
4464 }
4465 kvm_queue_exception(vcpu, UD_VECTOR);
4466
4467 return r;
4468}
4469
4470static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4471{
4472 gpa_t gpa;
4473 pfn_t pfn;
4474
4475 if (tdp_enabled)
4476 return false;
4477
4478
4479
4480
4481
4482
4483 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4484 return true;
4485
4486 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4487
4488 if (gpa == UNMAPPED_GVA)
4489 return true;
4490
4491
4492
4493
4494
4495
4496
4497 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4498 if (!is_error_pfn(pfn)) {
4499 kvm_release_pfn_clean(pfn);
4500 return true;
4501 }
4502
4503 return false;
4504}
4505
4506static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4507 unsigned long cr2, int emulation_type)
4508{
4509 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4510 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
4511
4512 last_retry_eip = vcpu->arch.last_retry_eip;
4513 last_retry_addr = vcpu->arch.last_retry_addr;
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
4529
4530 if (!(emulation_type & EMULTYPE_RETRY))
4531 return false;
4532
4533 if (x86_page_table_writing_insn(ctxt))
4534 return false;
4535
4536 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
4537 return false;
4538
4539 vcpu->arch.last_retry_eip = ctxt->eip;
4540 vcpu->arch.last_retry_addr = cr2;
4541
4542 if (!vcpu->arch.mmu.direct_map)
4543 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4544
4545 kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4546
4547 return true;
4548}
4549
4550static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
4551static int complete_emulated_pio(struct kvm_vcpu *vcpu);
4552
4553int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4554 unsigned long cr2,
4555 int emulation_type,
4556 void *insn,
4557 int insn_len)
4558{
4559 int r;
4560 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4561 bool writeback = true;
4562
4563 kvm_clear_exception_queue(vcpu);
4564
4565 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4566 init_emulate_ctxt(vcpu);
4567 ctxt->interruptibility = 0;
4568 ctxt->have_exception = false;
4569 ctxt->perm_ok = false;
4570
4571 ctxt->only_vendor_specific_insn
4572 = emulation_type & EMULTYPE_TRAP_UD;
4573
4574 r = x86_decode_insn(ctxt, insn, insn_len);
4575
4576 trace_kvm_emulate_insn_start(vcpu);
4577 ++vcpu->stat.insn_emulation;
4578 if (r != EMULATION_OK) {
4579 if (emulation_type & EMULTYPE_TRAP_UD)
4580 return EMULATE_FAIL;
4581 if (reexecute_instruction(vcpu, cr2))
4582 return EMULATE_DONE;
4583 if (emulation_type & EMULTYPE_SKIP)
4584 return EMULATE_FAIL;
4585 return handle_emulation_failure(vcpu);
4586 }
4587 }
4588
4589 if (emulation_type & EMULTYPE_SKIP) {
4590 kvm_rip_write(vcpu, ctxt->_eip);
4591 return EMULATE_DONE;
4592 }
4593
4594 if (retry_instruction(ctxt, cr2, emulation_type))
4595 return EMULATE_DONE;
4596
4597
4598
4599 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
4600 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4601 emulator_invalidate_register_cache(ctxt);
4602 }
4603
4604restart:
4605 r = x86_emulate_insn(ctxt);
4606
4607 if (r == EMULATION_INTERCEPTED)
4608 return EMULATE_DONE;
4609
4610 if (r == EMULATION_FAILED) {
4611 if (reexecute_instruction(vcpu, cr2))
4612 return EMULATE_DONE;
4613
4614 return handle_emulation_failure(vcpu);
4615 }
4616
4617 if (ctxt->have_exception) {
4618 inject_emulated_exception(vcpu);
4619 r = EMULATE_DONE;
4620 } else if (vcpu->arch.pio.count) {
4621 if (!vcpu->arch.pio.in)
4622 vcpu->arch.pio.count = 0;
4623 else {
4624 writeback = false;
4625 vcpu->arch.complete_userspace_io = complete_emulated_pio;
4626 }
4627 r = EMULATE_DO_MMIO;
4628 } else if (vcpu->mmio_needed) {
4629 if (!vcpu->mmio_is_write)
4630 writeback = false;
4631 r = EMULATE_DO_MMIO;
4632 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
4633 } else if (r == EMULATION_RESTART)
4634 goto restart;
4635 else
4636 r = EMULATE_DONE;
4637
4638 if (writeback) {
4639 toggle_interruptibility(vcpu, ctxt->interruptibility);
4640 kvm_set_rflags(vcpu, ctxt->eflags);
4641 kvm_make_request(KVM_REQ_EVENT, vcpu);
4642 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
4643 kvm_rip_write(vcpu, ctxt->eip);
4644 } else
4645 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4646
4647 return r;
4648}
4649EXPORT_SYMBOL_GPL(x86_emulate_instruction);
4650
4651int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4652{
4653 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
4654 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
4655 size, port, &val, 1);
4656
4657 vcpu->arch.pio.count = 0;
4658 return ret;
4659}
4660EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4661
4662static void tsc_bad(void *info)
4663{
4664 __this_cpu_write(cpu_tsc_khz, 0);
4665}
4666
4667static void tsc_khz_changed(void *data)
4668{
4669 struct cpufreq_freqs *freq = data;
4670 unsigned long khz = 0;
4671
4672 if (data)
4673 khz = freq->new;
4674 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4675 khz = cpufreq_quick_get(raw_smp_processor_id());
4676 if (!khz)
4677 khz = tsc_khz;
4678 __this_cpu_write(cpu_tsc_khz, khz);
4679}
4680
4681static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
4682 void *data)
4683{
4684 struct cpufreq_freqs *freq = data;
4685 struct kvm *kvm;
4686 struct kvm_vcpu *vcpu;
4687 int i, send_ipi = 0;
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
4729 return 0;
4730 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
4731 return 0;
4732
4733 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4734
4735 raw_spin_lock(&kvm_lock);
4736 list_for_each_entry(kvm, &vm_list, vm_list) {
4737 kvm_for_each_vcpu(i, vcpu, kvm) {
4738 if (vcpu->cpu != freq->cpu)
4739 continue;
4740 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4741 if (vcpu->cpu != smp_processor_id())
4742 send_ipi = 1;
4743 }
4744 }
4745 raw_spin_unlock(&kvm_lock);
4746
4747 if (freq->old < freq->new && send_ipi) {
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4761 }
4762 return 0;
4763}
4764
4765static struct notifier_block kvmclock_cpufreq_notifier_block = {
4766 .notifier_call = kvmclock_cpufreq_notifier
4767};
4768
4769static int kvmclock_cpu_notifier(struct notifier_block *nfb,
4770 unsigned long action, void *hcpu)
4771{
4772 unsigned int cpu = (unsigned long)hcpu;
4773
4774 switch (action) {
4775 case CPU_ONLINE:
4776 case CPU_DOWN_FAILED:
4777 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4778 break;
4779 case CPU_DOWN_PREPARE:
4780 smp_call_function_single(cpu, tsc_bad, NULL, 1);
4781 break;
4782 }
4783 return NOTIFY_OK;
4784}
4785
4786static struct notifier_block kvmclock_cpu_notifier_block = {
4787 .notifier_call = kvmclock_cpu_notifier,
4788 .priority = -INT_MAX
4789};
4790
4791static void kvm_timer_init(void)
4792{
4793 int cpu;
4794
4795 max_tsc_khz = tsc_khz;
4796 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4797 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
4798#ifdef CONFIG_CPU_FREQ
4799 struct cpufreq_policy policy;
4800 memset(&policy, 0, sizeof(policy));
4801 cpu = get_cpu();
4802 cpufreq_get_policy(&policy, cpu);
4803 if (policy.cpuinfo.max_freq)
4804 max_tsc_khz = policy.cpuinfo.max_freq;
4805 put_cpu();
4806#endif
4807 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4808 CPUFREQ_TRANSITION_NOTIFIER);
4809 }
4810 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
4811 for_each_online_cpu(cpu)
4812 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4813}
4814
4815static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
4816
4817int kvm_is_in_guest(void)
4818{
4819 return __this_cpu_read(current_vcpu) != NULL;
4820}
4821
4822static int kvm_is_user_mode(void)
4823{
4824 int user_mode = 3;
4825
4826 if (__this_cpu_read(current_vcpu))
4827 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
4828
4829 return user_mode != 0;
4830}
4831
4832static unsigned long kvm_get_guest_ip(void)
4833{
4834 unsigned long ip = 0;
4835
4836 if (__this_cpu_read(current_vcpu))
4837 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
4838
4839 return ip;
4840}
4841
4842static struct perf_guest_info_callbacks kvm_guest_cbs = {
4843 .is_in_guest = kvm_is_in_guest,
4844 .is_user_mode = kvm_is_user_mode,
4845 .get_guest_ip = kvm_get_guest_ip,
4846};
4847
4848void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
4849{
4850 __this_cpu_write(current_vcpu, vcpu);
4851}
4852EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
4853
4854void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
4855{
4856 __this_cpu_write(current_vcpu, NULL);
4857}
4858EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
4859
4860static void kvm_set_mmio_spte_mask(void)
4861{
4862 u64 mask;
4863 int maxphyaddr = boot_cpu_data.x86_phys_bits;
4864
4865
4866
4867
4868
4869 mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr;
4870 mask |= 1ull;
4871
4872#ifdef CONFIG_X86_64
4873
4874
4875
4876
4877 if (maxphyaddr == 52)
4878 mask &= ~1ull;
4879#endif
4880
4881 kvm_mmu_set_mmio_spte_mask(mask);
4882}
4883
4884int kvm_arch_init(void *opaque)
4885{
4886 int r;
4887 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
4888
4889 if (kvm_x86_ops) {
4890 printk(KERN_ERR "kvm: already loaded the other module\n");
4891 r = -EEXIST;
4892 goto out;
4893 }
4894
4895 if (!ops->cpu_has_kvm_support()) {
4896 printk(KERN_ERR "kvm: no hardware support\n");
4897 r = -EOPNOTSUPP;
4898 goto out;
4899 }
4900 if (ops->disabled_by_bios()) {
4901 printk(KERN_ERR "kvm: disabled by bios\n");
4902 r = -EOPNOTSUPP;
4903 goto out;
4904 }
4905
4906 r = kvm_mmu_module_init();
4907 if (r)
4908 goto out;
4909
4910 kvm_set_mmio_spte_mask();
4911 kvm_init_msr_list();
4912
4913 kvm_x86_ops = ops;
4914 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
4915 PT_DIRTY_MASK, PT64_NX_MASK, 0);
4916
4917 kvm_timer_init();
4918
4919 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4920
4921 if (cpu_has_xsave)
4922 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
4923
4924 kvm_lapic_init();
4925 return 0;
4926
4927out:
4928 return r;
4929}
4930
4931void kvm_arch_exit(void)
4932{
4933 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
4934
4935 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4936 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4937 CPUFREQ_TRANSITION_NOTIFIER);
4938 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4939 kvm_x86_ops = NULL;
4940 kvm_mmu_module_exit();
4941}
4942
4943int kvm_emulate_halt(struct kvm_vcpu *vcpu)
4944{
4945 ++vcpu->stat.halt_exits;
4946 if (irqchip_in_kernel(vcpu->kvm)) {
4947 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
4948 return 1;
4949 } else {
4950 vcpu->run->exit_reason = KVM_EXIT_HLT;
4951 return 0;
4952 }
4953}
4954EXPORT_SYMBOL_GPL(kvm_emulate_halt);
4955
4956int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
4957{
4958 u64 param, ingpa, outgpa, ret;
4959 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
4960 bool fast, longmode;
4961 int cs_db, cs_l;
4962
4963
4964
4965
4966
4967 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
4968 kvm_queue_exception(vcpu, UD_VECTOR);
4969 return 0;
4970 }
4971
4972 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4973 longmode = is_long_mode(vcpu) && cs_l == 1;
4974
4975 if (!longmode) {
4976 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
4977 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
4978 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
4979 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
4980 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
4981 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
4982 }
4983#ifdef CONFIG_X86_64
4984 else {
4985 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
4986 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
4987 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
4988 }
4989#endif
4990
4991 code = param & 0xffff;
4992 fast = (param >> 16) & 0x1;
4993 rep_cnt = (param >> 32) & 0xfff;
4994 rep_idx = (param >> 48) & 0xfff;
4995
4996 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
4997
4998 switch (code) {
4999 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5000 kvm_vcpu_on_spin(vcpu);
5001 break;
5002 default:
5003 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5004 break;
5005 }
5006
5007 ret = res | (((u64)rep_done & 0xfff) << 32);
5008 if (longmode) {
5009 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5010 } else {
5011 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5012 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5013 }
5014
5015 return 1;
5016}
5017
5018int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5019{
5020 unsigned long nr, a0, a1, a2, a3, ret;
5021 int r = 1;
5022
5023 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5024 return kvm_hv_hypercall(vcpu);
5025
5026 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5027 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5028 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5029 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5030 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5031
5032 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5033
5034 if (!is_long_mode(vcpu)) {
5035 nr &= 0xFFFFFFFF;
5036 a0 &= 0xFFFFFFFF;
5037 a1 &= 0xFFFFFFFF;
5038 a2 &= 0xFFFFFFFF;
5039 a3 &= 0xFFFFFFFF;
5040 }
5041
5042 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5043 ret = -KVM_EPERM;
5044 goto out;
5045 }
5046
5047 switch (nr) {
5048 case KVM_HC_VAPIC_POLL_IRQ:
5049 ret = 0;
5050 break;
5051 default:
5052 ret = -KVM_ENOSYS;
5053 break;
5054 }
5055out:
5056 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5057 ++vcpu->stat.hypercalls;
5058 return r;
5059}
5060EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5061
5062int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5063{
5064 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5065 char instruction[3];
5066 unsigned long rip = kvm_rip_read(vcpu);
5067
5068
5069
5070
5071
5072
5073 kvm_mmu_zap_all(vcpu->kvm);
5074
5075 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5076
5077 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
5078}
5079
5080
5081
5082
5083
5084
5085
5086static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5087{
5088 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5089 vcpu->run->request_interrupt_window &&
5090 kvm_arch_interrupt_allowed(vcpu));
5091}
5092
5093static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5094{
5095 struct kvm_run *kvm_run = vcpu->run;
5096
5097 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5098 kvm_run->cr8 = kvm_get_cr8(vcpu);
5099 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5100 if (irqchip_in_kernel(vcpu->kvm))
5101 kvm_run->ready_for_interrupt_injection = 1;
5102 else
5103 kvm_run->ready_for_interrupt_injection =
5104 kvm_arch_interrupt_allowed(vcpu) &&
5105 !kvm_cpu_has_interrupt(vcpu) &&
5106 !kvm_event_needs_reinjection(vcpu);
5107}
5108
5109static int vapic_enter(struct kvm_vcpu *vcpu)
5110{
5111 struct kvm_lapic *apic = vcpu->arch.apic;
5112 struct page *page;
5113
5114 if (!apic || !apic->vapic_addr)
5115 return 0;
5116
5117 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5118 if (is_error_page(page))
5119 return -EFAULT;
5120
5121 vcpu->arch.apic->vapic_page = page;
5122 return 0;
5123}
5124
5125static void vapic_exit(struct kvm_vcpu *vcpu)
5126{
5127 struct kvm_lapic *apic = vcpu->arch.apic;
5128 int idx;
5129
5130 if (!apic || !apic->vapic_addr)
5131 return;
5132
5133 idx = srcu_read_lock(&vcpu->kvm->srcu);
5134 kvm_release_page_dirty(apic->vapic_page);
5135 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5136 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5137}
5138
5139static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5140{
5141 int max_irr, tpr;
5142
5143 if (!kvm_x86_ops->update_cr8_intercept)
5144 return;
5145
5146 if (!vcpu->arch.apic)
5147 return;
5148
5149 if (!vcpu->arch.apic->vapic_addr)
5150 max_irr = kvm_lapic_find_highest_irr(vcpu);
5151 else
5152 max_irr = -1;
5153
5154 if (max_irr != -1)
5155 max_irr >>= 4;
5156
5157 tpr = kvm_lapic_get_cr8(vcpu);
5158
5159 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5160}
5161
5162static void inject_pending_event(struct kvm_vcpu *vcpu)
5163{
5164
5165 if (vcpu->arch.exception.pending) {
5166 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5167 vcpu->arch.exception.has_error_code,
5168 vcpu->arch.exception.error_code);
5169 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5170 vcpu->arch.exception.has_error_code,
5171 vcpu->arch.exception.error_code,
5172 vcpu->arch.exception.reinject);
5173 return;
5174 }
5175
5176 if (vcpu->arch.nmi_injected) {
5177 kvm_x86_ops->set_nmi(vcpu);
5178 return;
5179 }
5180
5181 if (vcpu->arch.interrupt.pending) {
5182 kvm_x86_ops->set_irq(vcpu);
5183 return;
5184 }
5185
5186
5187 if (vcpu->arch.nmi_pending) {
5188 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5189 --vcpu->arch.nmi_pending;
5190 vcpu->arch.nmi_injected = true;
5191 kvm_x86_ops->set_nmi(vcpu);
5192 }
5193 } else if (kvm_cpu_has_interrupt(vcpu)) {
5194 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5195 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5196 false);
5197 kvm_x86_ops->set_irq(vcpu);
5198 }
5199 }
5200}
5201
5202static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
5203{
5204 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
5205 !vcpu->guest_xcr0_loaded) {
5206
5207 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
5208 vcpu->guest_xcr0_loaded = 1;
5209 }
5210}
5211
5212static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
5213{
5214 if (vcpu->guest_xcr0_loaded) {
5215 if (vcpu->arch.xcr0 != host_xcr0)
5216 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
5217 vcpu->guest_xcr0_loaded = 0;
5218 }
5219}
5220
5221static void process_nmi(struct kvm_vcpu *vcpu)
5222{
5223 unsigned limit = 2;
5224
5225
5226
5227
5228
5229
5230 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
5231 limit = 1;
5232
5233 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
5234 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
5235 kvm_make_request(KVM_REQ_EVENT, vcpu);
5236}
5237
5238static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5239{
5240 int r;
5241 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5242 vcpu->run->request_interrupt_window;
5243 bool req_immediate_exit = 0;
5244
5245 if (vcpu->requests) {
5246 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
5247 kvm_mmu_unload(vcpu);
5248 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5249 __kvm_migrate_timers(vcpu);
5250 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5251 r = kvm_guest_time_update(vcpu);
5252 if (unlikely(r))
5253 goto out;
5254 }
5255 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
5256 kvm_mmu_sync_roots(vcpu);
5257 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
5258 kvm_x86_ops->tlb_flush(vcpu);
5259 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
5260 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
5261 r = 0;
5262 goto out;
5263 }
5264 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
5265 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5266 r = 0;
5267 goto out;
5268 }
5269 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
5270 vcpu->fpu_active = 0;
5271 kvm_x86_ops->fpu_deactivate(vcpu);
5272 }
5273 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5274
5275 vcpu->arch.apf.halted = true;
5276 r = 1;
5277 goto out;
5278 }
5279 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
5280 record_steal_time(vcpu);
5281 if (kvm_check_request(KVM_REQ_NMI, vcpu))
5282 process_nmi(vcpu);
5283 req_immediate_exit =
5284 kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
5285 if (kvm_check_request(KVM_REQ_PMU, vcpu))
5286 kvm_handle_pmu_event(vcpu);
5287 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5288 kvm_deliver_pmi(vcpu);
5289 }
5290
5291 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5292 inject_pending_event(vcpu);
5293
5294
5295 if (vcpu->arch.nmi_pending)
5296 kvm_x86_ops->enable_nmi_window(vcpu);
5297 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
5298 kvm_x86_ops->enable_irq_window(vcpu);
5299
5300 if (kvm_lapic_enabled(vcpu)) {
5301 update_cr8_intercept(vcpu);
5302 kvm_lapic_sync_to_vapic(vcpu);
5303 }
5304 }
5305
5306 r = kvm_mmu_reload(vcpu);
5307 if (unlikely(r)) {
5308 goto cancel_injection;
5309 }
5310
5311 preempt_disable();
5312
5313 kvm_x86_ops->prepare_guest_switch(vcpu);
5314 if (vcpu->fpu_active)
5315 kvm_load_guest_fpu(vcpu);
5316 kvm_load_guest_xcr0(vcpu);
5317
5318 vcpu->mode = IN_GUEST_MODE;
5319
5320
5321
5322
5323 smp_mb();
5324
5325 local_irq_disable();
5326
5327 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
5328 || need_resched() || signal_pending(current)) {
5329 vcpu->mode = OUTSIDE_GUEST_MODE;
5330 smp_wmb();
5331 local_irq_enable();
5332 preempt_enable();
5333 r = 1;
5334 goto cancel_injection;
5335 }
5336
5337 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5338
5339 if (req_immediate_exit)
5340 smp_send_reschedule(vcpu->cpu);
5341
5342 kvm_guest_enter();
5343
5344 if (unlikely(vcpu->arch.switch_db_regs)) {
5345 set_debugreg(0, 7);
5346 set_debugreg(vcpu->arch.eff_db[0], 0);
5347 set_debugreg(vcpu->arch.eff_db[1], 1);
5348 set_debugreg(vcpu->arch.eff_db[2], 2);
5349 set_debugreg(vcpu->arch.eff_db[3], 3);
5350 }
5351
5352 trace_kvm_entry(vcpu->vcpu_id);
5353 kvm_x86_ops->run(vcpu);
5354
5355
5356
5357
5358
5359
5360
5361
5362 if (hw_breakpoint_active())
5363 hw_breakpoint_restore();
5364
5365 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
5366
5367 vcpu->mode = OUTSIDE_GUEST_MODE;
5368 smp_wmb();
5369 local_irq_enable();
5370
5371 ++vcpu->stat.exits;
5372
5373
5374
5375
5376
5377
5378
5379 barrier();
5380
5381 kvm_guest_exit();
5382
5383 preempt_enable();
5384
5385 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5386
5387
5388
5389
5390 if (unlikely(prof_on == KVM_PROFILING)) {
5391 unsigned long rip = kvm_rip_read(vcpu);
5392 profile_hit(KVM_PROFILING, (void *)rip);
5393 }
5394
5395 if (unlikely(vcpu->arch.tsc_always_catchup))
5396 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5397
5398 if (vcpu->arch.apic_attention)
5399 kvm_lapic_sync_from_vapic(vcpu);
5400
5401 r = kvm_x86_ops->handle_exit(vcpu);
5402 return r;
5403
5404cancel_injection:
5405 kvm_x86_ops->cancel_injection(vcpu);
5406 if (unlikely(vcpu->arch.apic_attention))
5407 kvm_lapic_sync_from_vapic(vcpu);
5408out:
5409 return r;
5410}
5411
5412
5413static int __vcpu_run(struct kvm_vcpu *vcpu)
5414{
5415 int r;
5416 struct kvm *kvm = vcpu->kvm;
5417
5418 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
5419 pr_debug("vcpu %d received sipi with vector # %x\n",
5420 vcpu->vcpu_id, vcpu->arch.sipi_vector);
5421 kvm_lapic_reset(vcpu);
5422 r = kvm_arch_vcpu_reset(vcpu);
5423 if (r)
5424 return r;
5425 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5426 }
5427
5428 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5429 r = vapic_enter(vcpu);
5430 if (r) {
5431 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5432 return r;
5433 }
5434
5435 r = 1;
5436 while (r > 0) {
5437 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5438 !vcpu->arch.apf.halted)
5439 r = vcpu_enter_guest(vcpu);
5440 else {
5441 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5442 kvm_vcpu_block(vcpu);
5443 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5444 if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
5445 {
5446 switch(vcpu->arch.mp_state) {
5447 case KVM_MP_STATE_HALTED:
5448 vcpu->arch.mp_state =
5449 KVM_MP_STATE_RUNNABLE;
5450 case KVM_MP_STATE_RUNNABLE:
5451 vcpu->arch.apf.halted = false;
5452 break;
5453 case KVM_MP_STATE_SIPI_RECEIVED:
5454 default:
5455 r = -EINTR;
5456 break;
5457 }
5458 }
5459 }
5460
5461 if (r <= 0)
5462 break;
5463
5464 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
5465 if (kvm_cpu_has_pending_timer(vcpu))
5466 kvm_inject_pending_timer_irqs(vcpu);
5467
5468 if (dm_request_for_irq_injection(vcpu)) {
5469 r = -EINTR;
5470 vcpu->run->exit_reason = KVM_EXIT_INTR;
5471 ++vcpu->stat.request_irq_exits;
5472 }
5473
5474 kvm_check_async_pf_completion(vcpu);
5475
5476 if (signal_pending(current)) {
5477 r = -EINTR;
5478 vcpu->run->exit_reason = KVM_EXIT_INTR;
5479 ++vcpu->stat.signal_exits;
5480 }
5481 if (need_resched()) {
5482 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5483 kvm_resched(vcpu);
5484 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5485 }
5486 }
5487
5488 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5489
5490 vapic_exit(vcpu);
5491
5492 return r;
5493}
5494
5495static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
5496{
5497 int r;
5498 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5499 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5500 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5501 if (r != EMULATE_DONE)
5502 return 0;
5503 return 1;
5504}
5505
5506static int complete_emulated_pio(struct kvm_vcpu *vcpu)
5507{
5508 BUG_ON(!vcpu->arch.pio.count);
5509
5510 return complete_emulated_io(vcpu);
5511}
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
5532{
5533 struct kvm_run *run = vcpu->run;
5534 struct kvm_mmio_fragment *frag;
5535 unsigned len;
5536
5537 BUG_ON(!vcpu->mmio_needed);
5538
5539
5540 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
5541 len = min(8u, frag->len);
5542 if (!vcpu->mmio_is_write)
5543 memcpy(frag->data, run->mmio.data, len);
5544
5545 if (frag->len <= 8) {
5546
5547 frag++;
5548 vcpu->mmio_cur_fragment++;
5549 } else {
5550
5551 frag->data += len;
5552 frag->gpa += len;
5553 frag->len -= len;
5554 }
5555
5556 if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
5557 vcpu->mmio_needed = 0;
5558 if (vcpu->mmio_is_write)
5559 return 1;
5560 vcpu->mmio_read_completed = 1;
5561 return complete_emulated_io(vcpu);
5562 }
5563
5564 run->exit_reason = KVM_EXIT_MMIO;
5565 run->mmio.phys_addr = frag->gpa;
5566 if (vcpu->mmio_is_write)
5567 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
5568 run->mmio.len = min(8u, frag->len);
5569 run->mmio.is_write = vcpu->mmio_is_write;
5570 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
5571 return 0;
5572}
5573
5574
5575int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5576{
5577 int r;
5578 sigset_t sigsaved;
5579
5580 if (!tsk_used_math(current) && init_fpu(current))
5581 return -ENOMEM;
5582
5583 if (vcpu->sigset_active)
5584 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
5585
5586 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
5587 kvm_vcpu_block(vcpu);
5588 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
5589 r = -EAGAIN;
5590 goto out;
5591 }
5592
5593
5594 if (!irqchip_in_kernel(vcpu->kvm)) {
5595 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
5596 r = -EINVAL;
5597 goto out;
5598 }
5599 }
5600
5601 if (unlikely(vcpu->arch.complete_userspace_io)) {
5602 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
5603 vcpu->arch.complete_userspace_io = NULL;
5604 r = cui(vcpu);
5605 if (r <= 0)
5606 goto out;
5607 } else
5608 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
5609
5610 r = __vcpu_run(vcpu);
5611
5612out:
5613 post_kvm_run_save(vcpu);
5614 if (vcpu->sigset_active)
5615 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
5616
5617 return r;
5618}
5619
5620int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5621{
5622 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
5623
5624
5625
5626
5627
5628
5629
5630 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
5631 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5632 }
5633 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5634 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5635 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5636 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5637 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
5638 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
5639 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
5640 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
5641#ifdef CONFIG_X86_64
5642 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
5643 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
5644 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
5645 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
5646 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
5647 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
5648 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
5649 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
5650#endif
5651
5652 regs->rip = kvm_rip_read(vcpu);
5653 regs->rflags = kvm_get_rflags(vcpu);
5654
5655 return 0;
5656}
5657
5658int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5659{
5660 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
5661 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5662
5663 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
5664 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
5665 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
5666 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
5667 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
5668 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
5669 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
5670 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
5671#ifdef CONFIG_X86_64
5672 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
5673 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
5674 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
5675 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
5676 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
5677 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
5678 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
5679 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
5680#endif
5681
5682 kvm_rip_write(vcpu, regs->rip);
5683 kvm_set_rflags(vcpu, regs->rflags);
5684
5685 vcpu->arch.exception.pending = false;
5686
5687 kvm_make_request(KVM_REQ_EVENT, vcpu);
5688
5689 return 0;
5690}
5691
5692void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
5693{
5694 struct kvm_segment cs;
5695
5696 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
5697 *db = cs.db;
5698 *l = cs.l;
5699}
5700EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
5701
5702int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
5703 struct kvm_sregs *sregs)
5704{
5705 struct desc_ptr dt;
5706
5707 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5708 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
5709 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
5710 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
5711 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
5712 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
5713
5714 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
5715 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
5716
5717 kvm_x86_ops->get_idt(vcpu, &dt);
5718 sregs->idt.limit = dt.size;
5719 sregs->idt.base = dt.address;
5720 kvm_x86_ops->get_gdt(vcpu, &dt);
5721 sregs->gdt.limit = dt.size;
5722 sregs->gdt.base = dt.address;
5723
5724 sregs->cr0 = kvm_read_cr0(vcpu);
5725 sregs->cr2 = vcpu->arch.cr2;
5726 sregs->cr3 = kvm_read_cr3(vcpu);
5727 sregs->cr4 = kvm_read_cr4(vcpu);
5728 sregs->cr8 = kvm_get_cr8(vcpu);
5729 sregs->efer = vcpu->arch.efer;
5730 sregs->apic_base = kvm_get_apic_base(vcpu);
5731
5732 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
5733
5734 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
5735 set_bit(vcpu->arch.interrupt.nr,
5736 (unsigned long *)sregs->interrupt_bitmap);
5737
5738 return 0;
5739}
5740
5741int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
5742 struct kvm_mp_state *mp_state)
5743{
5744 mp_state->mp_state = vcpu->arch.mp_state;
5745 return 0;
5746}
5747
5748int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
5749 struct kvm_mp_state *mp_state)
5750{
5751 vcpu->arch.mp_state = mp_state->mp_state;
5752 kvm_make_request(KVM_REQ_EVENT, vcpu);
5753 return 0;
5754}
5755
5756int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
5757 int reason, bool has_error_code, u32 error_code)
5758{
5759 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5760 int ret;
5761
5762 init_emulate_ctxt(vcpu);
5763
5764 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
5765 has_error_code, error_code);
5766
5767 if (ret)
5768 return EMULATE_FAIL;
5769
5770 kvm_rip_write(vcpu, ctxt->eip);
5771 kvm_set_rflags(vcpu, ctxt->eflags);
5772 kvm_make_request(KVM_REQ_EVENT, vcpu);
5773 return EMULATE_DONE;
5774}
5775EXPORT_SYMBOL_GPL(kvm_task_switch);
5776
5777int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5778 struct kvm_sregs *sregs)
5779{
5780 int mmu_reset_needed = 0;
5781 int pending_vec, max_bits, idx;
5782 struct desc_ptr dt;
5783
5784 if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
5785 return -EINVAL;
5786
5787 dt.size = sregs->idt.limit;
5788 dt.address = sregs->idt.base;
5789 kvm_x86_ops->set_idt(vcpu, &dt);
5790 dt.size = sregs->gdt.limit;
5791 dt.address = sregs->gdt.base;
5792 kvm_x86_ops->set_gdt(vcpu, &dt);
5793
5794 vcpu->arch.cr2 = sregs->cr2;
5795 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
5796 vcpu->arch.cr3 = sregs->cr3;
5797 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
5798
5799 kvm_set_cr8(vcpu, sregs->cr8);
5800
5801 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
5802 kvm_x86_ops->set_efer(vcpu, sregs->efer);
5803 kvm_set_apic_base(vcpu, sregs->apic_base);
5804
5805 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
5806 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
5807 vcpu->arch.cr0 = sregs->cr0;
5808
5809 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5810 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5811 if (sregs->cr4 & X86_CR4_OSXSAVE)
5812 kvm_update_cpuid(vcpu);
5813
5814 idx = srcu_read_lock(&vcpu->kvm->srcu);
5815 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5816 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
5817 mmu_reset_needed = 1;
5818 }
5819 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5820
5821 if (mmu_reset_needed)
5822 kvm_mmu_reset_context(vcpu);
5823
5824 max_bits = KVM_NR_INTERRUPTS;
5825 pending_vec = find_first_bit(
5826 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
5827 if (pending_vec < max_bits) {
5828 kvm_queue_interrupt(vcpu, pending_vec, false);
5829 pr_debug("Set back pending irq %d\n", pending_vec);
5830 }
5831
5832 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5833 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
5834 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
5835 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
5836 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
5837 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
5838
5839 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
5840 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
5841
5842 update_cr8_intercept(vcpu);
5843
5844
5845 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
5846 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
5847 !is_protmode(vcpu))
5848 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5849
5850 kvm_make_request(KVM_REQ_EVENT, vcpu);
5851
5852 return 0;
5853}
5854
5855int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5856 struct kvm_guest_debug *dbg)
5857{
5858 unsigned long rflags;
5859 int i, r;
5860
5861 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
5862 r = -EBUSY;
5863 if (vcpu->arch.exception.pending)
5864 goto out;
5865 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
5866 kvm_queue_exception(vcpu, DB_VECTOR);
5867 else
5868 kvm_queue_exception(vcpu, BP_VECTOR);
5869 }
5870
5871
5872
5873
5874
5875 rflags = kvm_get_rflags(vcpu);
5876
5877 vcpu->guest_debug = dbg->control;
5878 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
5879 vcpu->guest_debug = 0;
5880
5881 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5882 for (i = 0; i < KVM_NR_DB_REGS; ++i)
5883 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
5884 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
5885 } else {
5886 for (i = 0; i < KVM_NR_DB_REGS; i++)
5887 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
5888 }
5889 kvm_update_dr7(vcpu);
5890
5891 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5892 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
5893 get_segment_base(vcpu, VCPU_SREG_CS);
5894
5895
5896
5897
5898
5899 kvm_set_rflags(vcpu, rflags);
5900
5901 kvm_x86_ops->update_db_bp_intercept(vcpu);
5902
5903 r = 0;
5904
5905out:
5906
5907 return r;
5908}
5909
5910
5911
5912
5913int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
5914 struct kvm_translation *tr)
5915{
5916 unsigned long vaddr = tr->linear_address;
5917 gpa_t gpa;
5918 int idx;
5919
5920 idx = srcu_read_lock(&vcpu->kvm->srcu);
5921 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
5922 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5923 tr->physical_address = gpa;
5924 tr->valid = gpa != UNMAPPED_GVA;
5925 tr->writeable = 1;
5926 tr->usermode = 0;
5927
5928 return 0;
5929}
5930
5931int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5932{
5933 struct i387_fxsave_struct *fxsave =
5934 &vcpu->arch.guest_fpu.state->fxsave;
5935
5936 memcpy(fpu->fpr, fxsave->st_space, 128);
5937 fpu->fcw = fxsave->cwd;
5938 fpu->fsw = fxsave->swd;
5939 fpu->ftwx = fxsave->twd;
5940 fpu->last_opcode = fxsave->fop;
5941 fpu->last_ip = fxsave->rip;
5942 fpu->last_dp = fxsave->rdp;
5943 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
5944
5945 return 0;
5946}
5947
5948int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5949{
5950 struct i387_fxsave_struct *fxsave =
5951 &vcpu->arch.guest_fpu.state->fxsave;
5952
5953 memcpy(fxsave->st_space, fpu->fpr, 128);
5954 fxsave->cwd = fpu->fcw;
5955 fxsave->swd = fpu->fsw;
5956 fxsave->twd = fpu->ftwx;
5957 fxsave->fop = fpu->last_opcode;
5958 fxsave->rip = fpu->last_ip;
5959 fxsave->rdp = fpu->last_dp;
5960 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
5961
5962 return 0;
5963}
5964
5965int fx_init(struct kvm_vcpu *vcpu)
5966{
5967 int err;
5968
5969 err = fpu_alloc(&vcpu->arch.guest_fpu);
5970 if (err)
5971 return err;
5972
5973 fpu_finit(&vcpu->arch.guest_fpu);
5974
5975
5976
5977
5978 vcpu->arch.xcr0 = XSTATE_FP;
5979
5980 vcpu->arch.cr0 |= X86_CR0_ET;
5981
5982 return 0;
5983}
5984EXPORT_SYMBOL_GPL(fx_init);
5985
5986static void fx_free(struct kvm_vcpu *vcpu)
5987{
5988 fpu_free(&vcpu->arch.guest_fpu);
5989}
5990
5991void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
5992{
5993 if (vcpu->guest_fpu_loaded)
5994 return;
5995
5996
5997
5998
5999
6000
6001 kvm_put_guest_xcr0(vcpu);
6002 vcpu->guest_fpu_loaded = 1;
6003 __kernel_fpu_begin();
6004 fpu_restore_checking(&vcpu->arch.guest_fpu);
6005 trace_kvm_fpu(1);
6006}
6007
6008void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6009{
6010 kvm_put_guest_xcr0(vcpu);
6011
6012 if (!vcpu->guest_fpu_loaded)
6013 return;
6014
6015 vcpu->guest_fpu_loaded = 0;
6016 fpu_save_init(&vcpu->arch.guest_fpu);
6017 __kernel_fpu_end();
6018 ++vcpu->stat.fpu_reload;
6019 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
6020 trace_kvm_fpu(0);
6021}
6022
6023void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
6024{
6025 kvmclock_reset(vcpu);
6026
6027 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6028 fx_free(vcpu);
6029 kvm_x86_ops->vcpu_free(vcpu);
6030}
6031
6032struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
6033 unsigned int id)
6034{
6035 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
6036 printk_once(KERN_WARNING
6037 "kvm: SMP vm created on host with unstable TSC; "
6038 "guest TSC will not be reliable\n");
6039 return kvm_x86_ops->vcpu_create(kvm, id);
6040}
6041
6042int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6043{
6044 int r;
6045
6046 vcpu->arch.mtrr_state.have_fixed = 1;
6047 r = vcpu_load(vcpu);
6048 if (r)
6049 return r;
6050 r = kvm_arch_vcpu_reset(vcpu);
6051 if (r == 0)
6052 r = kvm_mmu_setup(vcpu);
6053 vcpu_put(vcpu);
6054
6055 return r;
6056}
6057
6058void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6059{
6060 int r;
6061 vcpu->arch.apf.msr_val = 0;
6062
6063 r = vcpu_load(vcpu);
6064 BUG_ON(r);
6065 kvm_mmu_unload(vcpu);
6066 vcpu_put(vcpu);
6067
6068 fx_free(vcpu);
6069 kvm_x86_ops->vcpu_free(vcpu);
6070}
6071
6072int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
6073{
6074 atomic_set(&vcpu->arch.nmi_queued, 0);
6075 vcpu->arch.nmi_pending = 0;
6076 vcpu->arch.nmi_injected = false;
6077
6078 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6079 vcpu->arch.dr6 = DR6_FIXED_1;
6080 vcpu->arch.dr7 = DR7_FIXED_1;
6081 kvm_update_dr7(vcpu);
6082
6083 kvm_make_request(KVM_REQ_EVENT, vcpu);
6084 vcpu->arch.apf.msr_val = 0;
6085 vcpu->arch.st.msr_val = 0;
6086
6087 kvmclock_reset(vcpu);
6088
6089 kvm_clear_async_pf_completion_queue(vcpu);
6090 kvm_async_pf_hash_reset(vcpu);
6091 vcpu->arch.apf.halted = false;
6092
6093 kvm_pmu_reset(vcpu);
6094
6095 return kvm_x86_ops->vcpu_reset(vcpu);
6096}
6097
6098int kvm_arch_hardware_enable(void *garbage)
6099{
6100 struct kvm *kvm;
6101 struct kvm_vcpu *vcpu;
6102 int i;
6103 int ret;
6104 u64 local_tsc;
6105 u64 max_tsc = 0;
6106 bool stable, backwards_tsc = false;
6107
6108 kvm_shared_msr_cpu_online();
6109 ret = kvm_x86_ops->hardware_enable(garbage);
6110 if (ret != 0)
6111 return ret;
6112
6113 local_tsc = native_read_tsc();
6114 stable = !check_tsc_unstable();
6115 list_for_each_entry(kvm, &vm_list, vm_list) {
6116 kvm_for_each_vcpu(i, vcpu, kvm) {
6117 if (!stable && vcpu->cpu == smp_processor_id())
6118 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
6119 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6120 backwards_tsc = true;
6121 if (vcpu->arch.last_host_tsc > max_tsc)
6122 max_tsc = vcpu->arch.last_host_tsc;
6123 }
6124 }
6125 }
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165 if (backwards_tsc) {
6166 u64 delta_cyc = max_tsc - local_tsc;
6167 list_for_each_entry(kvm, &vm_list, vm_list) {
6168 kvm_for_each_vcpu(i, vcpu, kvm) {
6169 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6170 vcpu->arch.last_host_tsc = local_tsc;
6171 }
6172
6173
6174
6175
6176
6177
6178
6179 kvm->arch.last_tsc_nsec = 0;
6180 kvm->arch.last_tsc_write = 0;
6181 }
6182
6183 }
6184 return 0;
6185}
6186
6187void kvm_arch_hardware_disable(void *garbage)
6188{
6189 kvm_x86_ops->hardware_disable(garbage);
6190 drop_user_return_notifiers(garbage);
6191}
6192
6193int kvm_arch_hardware_setup(void)
6194{
6195 return kvm_x86_ops->hardware_setup();
6196}
6197
6198void kvm_arch_hardware_unsetup(void)
6199{
6200 kvm_x86_ops->hardware_unsetup();
6201}
6202
6203void kvm_arch_check_processor_compat(void *rtn)
6204{
6205 kvm_x86_ops->check_processor_compatibility(rtn);
6206}
6207
6208bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6209{
6210 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6211}
6212
6213struct static_key kvm_no_apic_vcpu __read_mostly;
6214
6215int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6216{
6217 struct page *page;
6218 struct kvm *kvm;
6219 int r;
6220
6221 BUG_ON(vcpu->kvm == NULL);
6222 kvm = vcpu->kvm;
6223
6224 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
6225 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
6226 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6227 else
6228 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
6229
6230 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
6231 if (!page) {
6232 r = -ENOMEM;
6233 goto fail;
6234 }
6235 vcpu->arch.pio_data = page_address(page);
6236
6237 kvm_set_tsc_khz(vcpu, max_tsc_khz);
6238
6239 r = kvm_mmu_create(vcpu);
6240 if (r < 0)
6241 goto fail_free_pio_data;
6242
6243 if (irqchip_in_kernel(kvm)) {
6244 r = kvm_create_lapic(vcpu);
6245 if (r < 0)
6246 goto fail_mmu_destroy;
6247 } else
6248 static_key_slow_inc(&kvm_no_apic_vcpu);
6249
6250 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
6251 GFP_KERNEL);
6252 if (!vcpu->arch.mce_banks) {
6253 r = -ENOMEM;
6254 goto fail_free_lapic;
6255 }
6256 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
6257
6258 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
6259 goto fail_free_mce_banks;
6260
6261 kvm_async_pf_hash_reset(vcpu);
6262 kvm_pmu_init(vcpu);
6263
6264 return 0;
6265fail_free_mce_banks:
6266 kfree(vcpu->arch.mce_banks);
6267fail_free_lapic:
6268 kvm_free_lapic(vcpu);
6269fail_mmu_destroy:
6270 kvm_mmu_destroy(vcpu);
6271fail_free_pio_data:
6272 free_page((unsigned long)vcpu->arch.pio_data);
6273fail:
6274 return r;
6275}
6276
6277void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
6278{
6279 int idx;
6280
6281 kvm_pmu_destroy(vcpu);
6282 kfree(vcpu->arch.mce_banks);
6283 kvm_free_lapic(vcpu);
6284 idx = srcu_read_lock(&vcpu->kvm->srcu);
6285 kvm_mmu_destroy(vcpu);
6286 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6287 free_page((unsigned long)vcpu->arch.pio_data);
6288 if (!irqchip_in_kernel(vcpu->kvm))
6289 static_key_slow_dec(&kvm_no_apic_vcpu);
6290}
6291
6292int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6293{
6294 if (type)
6295 return -EINVAL;
6296
6297 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6298 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6299
6300
6301 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
6302
6303 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
6304 &kvm->arch.irq_sources_bitmap);
6305
6306 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6307 mutex_init(&kvm->arch.apic_map_lock);
6308
6309 return 0;
6310}
6311
6312static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
6313{
6314 int r;
6315 r = vcpu_load(vcpu);
6316 BUG_ON(r);
6317 kvm_mmu_unload(vcpu);
6318 vcpu_put(vcpu);
6319}
6320
6321static void kvm_free_vcpus(struct kvm *kvm)
6322{
6323 unsigned int i;
6324 struct kvm_vcpu *vcpu;
6325
6326
6327
6328
6329 kvm_for_each_vcpu(i, vcpu, kvm) {
6330 kvm_clear_async_pf_completion_queue(vcpu);
6331 kvm_unload_vcpu_mmu(vcpu);
6332 }
6333 kvm_for_each_vcpu(i, vcpu, kvm)
6334 kvm_arch_vcpu_free(vcpu);
6335
6336 mutex_lock(&kvm->lock);
6337 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
6338 kvm->vcpus[i] = NULL;
6339
6340 atomic_set(&kvm->online_vcpus, 0);
6341 mutex_unlock(&kvm->lock);
6342}
6343
6344void kvm_arch_sync_events(struct kvm *kvm)
6345{
6346 kvm_free_all_assigned_devices(kvm);
6347 kvm_free_pit(kvm);
6348}
6349
6350void kvm_arch_destroy_vm(struct kvm *kvm)
6351{
6352 kvm_iommu_unmap_guest(kvm);
6353 kfree(kvm->arch.vpic);
6354 kfree(kvm->arch.vioapic);
6355 kvm_free_vcpus(kvm);
6356 if (kvm->arch.apic_access_page)
6357 put_page(kvm->arch.apic_access_page);
6358 if (kvm->arch.ept_identity_pagetable)
6359 put_page(kvm->arch.ept_identity_pagetable);
6360 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
6361}
6362
6363void kvm_arch_free_memslot(struct kvm_memory_slot *free,
6364 struct kvm_memory_slot *dont)
6365{
6366 int i;
6367
6368 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6369 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
6370 kvm_kvfree(free->arch.rmap[i]);
6371 free->arch.rmap[i] = NULL;
6372 }
6373 if (i == 0)
6374 continue;
6375
6376 if (!dont || free->arch.lpage_info[i - 1] !=
6377 dont->arch.lpage_info[i - 1]) {
6378 kvm_kvfree(free->arch.lpage_info[i - 1]);
6379 free->arch.lpage_info[i - 1] = NULL;
6380 }
6381 }
6382}
6383
6384int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
6385{
6386 int i;
6387
6388 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6389 unsigned long ugfn;
6390 int lpages;
6391 int level = i + 1;
6392
6393 lpages = gfn_to_index(slot->base_gfn + npages - 1,
6394 slot->base_gfn, level) + 1;
6395
6396 slot->arch.rmap[i] =
6397 kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
6398 if (!slot->arch.rmap[i])
6399 goto out_free;
6400 if (i == 0)
6401 continue;
6402
6403 slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
6404 sizeof(*slot->arch.lpage_info[i - 1]));
6405 if (!slot->arch.lpage_info[i - 1])
6406 goto out_free;
6407
6408 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
6409 slot->arch.lpage_info[i - 1][0].write_count = 1;
6410 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
6411 slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
6412 ugfn = slot->userspace_addr >> PAGE_SHIFT;
6413
6414
6415
6416
6417
6418 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
6419 !kvm_largepages_enabled()) {
6420 unsigned long j;
6421
6422 for (j = 0; j < lpages; ++j)
6423 slot->arch.lpage_info[i - 1][j].write_count = 1;
6424 }
6425 }
6426
6427 return 0;
6428
6429out_free:
6430 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6431 kvm_kvfree(slot->arch.rmap[i]);
6432 slot->arch.rmap[i] = NULL;
6433 if (i == 0)
6434 continue;
6435
6436 kvm_kvfree(slot->arch.lpage_info[i - 1]);
6437 slot->arch.lpage_info[i - 1] = NULL;
6438 }
6439 return -ENOMEM;
6440}
6441
6442int kvm_arch_prepare_memory_region(struct kvm *kvm,
6443 struct kvm_memory_slot *memslot,
6444 struct kvm_memory_slot old,
6445 struct kvm_userspace_memory_region *mem,
6446 int user_alloc)
6447{
6448 int npages = memslot->npages;
6449 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6450
6451
6452 if (memslot->id >= KVM_MEMORY_SLOTS)
6453 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6454
6455
6456
6457
6458 if (!user_alloc) {
6459 if (npages && !old.npages) {
6460 unsigned long userspace_addr;
6461
6462 userspace_addr = vm_mmap(NULL, 0,
6463 npages * PAGE_SIZE,
6464 PROT_READ | PROT_WRITE,
6465 map_flags,
6466 0);
6467
6468 if (IS_ERR((void *)userspace_addr))
6469 return PTR_ERR((void *)userspace_addr);
6470
6471 memslot->userspace_addr = userspace_addr;
6472 }
6473 }
6474
6475
6476 return 0;
6477}
6478
6479void kvm_arch_commit_memory_region(struct kvm *kvm,
6480 struct kvm_userspace_memory_region *mem,
6481 struct kvm_memory_slot old,
6482 int user_alloc)
6483{
6484
6485 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6486
6487 if (!user_alloc && !old.user_alloc && old.npages && !npages) {
6488 int ret;
6489
6490 ret = vm_munmap(old.userspace_addr,
6491 old.npages * PAGE_SIZE);
6492 if (ret < 0)
6493 printk(KERN_WARNING
6494 "kvm_vm_ioctl_set_memory_region: "
6495 "failed to munmap memory\n");
6496 }
6497
6498 if (!kvm->arch.n_requested_mmu_pages)
6499 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6500
6501 spin_lock(&kvm->mmu_lock);
6502 if (nr_mmu_pages)
6503 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6504 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6505 spin_unlock(&kvm->mmu_lock);
6506
6507
6508
6509
6510 if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
6511 kvm_mmu_zap_all(kvm);
6512 kvm_reload_remote_mmus(kvm);
6513 }
6514}
6515
6516void kvm_arch_flush_shadow_all(struct kvm *kvm)
6517{
6518 kvm_mmu_zap_all(kvm);
6519 kvm_reload_remote_mmus(kvm);
6520}
6521
6522void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
6523 struct kvm_memory_slot *slot)
6524{
6525 kvm_arch_flush_shadow_all(kvm);
6526}
6527
6528int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6529{
6530 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6531 !vcpu->arch.apf.halted)
6532 || !list_empty_careful(&vcpu->async_pf.done)
6533 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6534 || atomic_read(&vcpu->arch.nmi_queued) ||
6535 (kvm_arch_interrupt_allowed(vcpu) &&
6536 kvm_cpu_has_interrupt(vcpu));
6537}
6538
6539int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
6540{
6541 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
6542}
6543
6544int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
6545{
6546 return kvm_x86_ops->interrupt_allowed(vcpu);
6547}
6548
6549bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
6550{
6551 unsigned long current_rip = kvm_rip_read(vcpu) +
6552 get_segment_base(vcpu, VCPU_SREG_CS);
6553
6554 return current_rip == linear_rip;
6555}
6556EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
6557
6558unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
6559{
6560 unsigned long rflags;
6561
6562 rflags = kvm_x86_ops->get_rflags(vcpu);
6563 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6564 rflags &= ~X86_EFLAGS_TF;
6565 return rflags;
6566}
6567EXPORT_SYMBOL_GPL(kvm_get_rflags);
6568
6569void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
6570{
6571 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
6572 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
6573 rflags |= X86_EFLAGS_TF;
6574 kvm_x86_ops->set_rflags(vcpu, rflags);
6575 kvm_make_request(KVM_REQ_EVENT, vcpu);
6576}
6577EXPORT_SYMBOL_GPL(kvm_set_rflags);
6578
6579void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
6580{
6581 int r;
6582
6583 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
6584 is_error_page(work->page))
6585 return;
6586
6587 r = kvm_mmu_reload(vcpu);
6588 if (unlikely(r))
6589 return;
6590
6591 if (!vcpu->arch.mmu.direct_map &&
6592 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
6593 return;
6594
6595 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
6596}
6597
6598static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
6599{
6600 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
6601}
6602
6603static inline u32 kvm_async_pf_next_probe(u32 key)
6604{
6605 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
6606}
6607
6608static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6609{
6610 u32 key = kvm_async_pf_hash_fn(gfn);
6611
6612 while (vcpu->arch.apf.gfns[key] != ~0)
6613 key = kvm_async_pf_next_probe(key);
6614
6615 vcpu->arch.apf.gfns[key] = gfn;
6616}
6617
6618static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
6619{
6620 int i;
6621 u32 key = kvm_async_pf_hash_fn(gfn);
6622
6623 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
6624 (vcpu->arch.apf.gfns[key] != gfn &&
6625 vcpu->arch.apf.gfns[key] != ~0); i++)
6626 key = kvm_async_pf_next_probe(key);
6627
6628 return key;
6629}
6630
6631bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6632{
6633 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
6634}
6635
6636static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6637{
6638 u32 i, j, k;
6639
6640 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
6641 while (true) {
6642 vcpu->arch.apf.gfns[i] = ~0;
6643 do {
6644 j = kvm_async_pf_next_probe(j);
6645 if (vcpu->arch.apf.gfns[j] == ~0)
6646 return;
6647 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
6648
6649
6650
6651
6652
6653 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
6654 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
6655 i = j;
6656 }
6657}
6658
6659static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
6660{
6661
6662 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
6663 sizeof(val));
6664}
6665
6666void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
6667 struct kvm_async_pf *work)
6668{
6669 struct x86_exception fault;
6670
6671 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
6672 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
6673
6674 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
6675 (vcpu->arch.apf.send_user_only &&
6676 kvm_x86_ops->get_cpl(vcpu) == 0))
6677 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
6678 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
6679 fault.vector = PF_VECTOR;
6680 fault.error_code_valid = true;
6681 fault.error_code = 0;
6682 fault.nested_page_fault = false;
6683 fault.address = work->arch.token;
6684 kvm_inject_page_fault(vcpu, &fault);
6685 }
6686}
6687
6688void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6689 struct kvm_async_pf *work)
6690{
6691 struct x86_exception fault;
6692
6693 trace_kvm_async_pf_ready(work->arch.token, work->gva);
6694 if (is_error_page(work->page))
6695 work->arch.token = ~0;
6696 else
6697 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
6698
6699 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
6700 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
6701 fault.vector = PF_VECTOR;
6702 fault.error_code_valid = true;
6703 fault.error_code = 0;
6704 fault.nested_page_fault = false;
6705 fault.address = work->arch.token;
6706 kvm_inject_page_fault(vcpu, &fault);
6707 }
6708 vcpu->arch.apf.halted = false;
6709 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6710}
6711
6712bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
6713{
6714 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
6715 return true;
6716 else
6717 return !kvm_event_needs_reinjection(vcpu) &&
6718 kvm_x86_ops->interrupt_allowed(vcpu);
6719}
6720
6721EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
6722EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
6723EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
6724EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
6725EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
6726EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
6727EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
6728EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
6729EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
6730EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
6731EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
6732EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
6733