1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29
30#include <linux/clocksource.h>
31#include <linux/interrupt.h>
32#include <linux/kvm.h>
33#include <linux/fs.h>
34#include <linux/vmalloc.h>
35#include <linux/module.h>
36#include <linux/mman.h>
37#include <linux/highmem.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/cpufreq.h>
41#include <linux/user-return-notifier.h>
42#include <linux/srcu.h>
43#include <linux/slab.h>
44#include <linux/perf_event.h>
45#include <linux/uaccess.h>
46#include <linux/hash.h>
47#include <trace/events/kvm.h>
48
49#define CREATE_TRACE_POINTS
50#include "trace.h"
51
52#include <asm/debugreg.h>
53#include <asm/msr.h>
54#include <asm/desc.h>
55#include <asm/mtrr.h>
56#include <asm/mce.h>
57#include <asm/i387.h>
58#include <asm/xcr.h>
59#include <asm/pvclock.h>
60#include <asm/div64.h>
61
62#define MAX_IO_MSRS 256
63#define KVM_MAX_MCE_BANKS 32
64#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
65
66#define emul_to_vcpu(ctxt) \
67 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
68
69
70
71
72
73#ifdef CONFIG_X86_64
74static
75u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
76#else
77static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
78#endif
79
80#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
81#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
82
83static void update_cr8_intercept(struct kvm_vcpu *vcpu);
84static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
85 struct kvm_cpuid_entry2 __user *entries);
86
87struct kvm_x86_ops *kvm_x86_ops;
88EXPORT_SYMBOL_GPL(kvm_x86_ops);
89
90int ignore_msrs = 0;
91module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
92
93bool kvm_has_tsc_control;
94EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
95u32 kvm_max_guest_tsc_khz;
96EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
97
98#define KVM_NR_SHARED_MSRS 16
99
100struct kvm_shared_msrs_global {
101 int nr;
102 u32 msrs[KVM_NR_SHARED_MSRS];
103};
104
105struct kvm_shared_msrs {
106 struct user_return_notifier urn;
107 bool registered;
108 struct kvm_shared_msr_values {
109 u64 host;
110 u64 curr;
111 } values[KVM_NR_SHARED_MSRS];
112};
113
114static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
115static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
116
117struct kvm_stats_debugfs_item debugfs_entries[] = {
118 { "pf_fixed", VCPU_STAT(pf_fixed) },
119 { "pf_guest", VCPU_STAT(pf_guest) },
120 { "tlb_flush", VCPU_STAT(tlb_flush) },
121 { "invlpg", VCPU_STAT(invlpg) },
122 { "exits", VCPU_STAT(exits) },
123 { "io_exits", VCPU_STAT(io_exits) },
124 { "mmio_exits", VCPU_STAT(mmio_exits) },
125 { "signal_exits", VCPU_STAT(signal_exits) },
126 { "irq_window", VCPU_STAT(irq_window_exits) },
127 { "nmi_window", VCPU_STAT(nmi_window_exits) },
128 { "halt_exits", VCPU_STAT(halt_exits) },
129 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
130 { "hypercalls", VCPU_STAT(hypercalls) },
131 { "request_irq", VCPU_STAT(request_irq_exits) },
132 { "irq_exits", VCPU_STAT(irq_exits) },
133 { "host_state_reload", VCPU_STAT(host_state_reload) },
134 { "efer_reload", VCPU_STAT(efer_reload) },
135 { "fpu_reload", VCPU_STAT(fpu_reload) },
136 { "insn_emulation", VCPU_STAT(insn_emulation) },
137 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
138 { "irq_injections", VCPU_STAT(irq_injections) },
139 { "nmi_injections", VCPU_STAT(nmi_injections) },
140 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
141 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
142 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
143 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
144 { "mmu_flooded", VM_STAT(mmu_flooded) },
145 { "mmu_recycled", VM_STAT(mmu_recycled) },
146 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
147 { "mmu_unsync", VM_STAT(mmu_unsync) },
148 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
149 { "largepages", VM_STAT(lpages) },
150 { NULL }
151};
152
153u64 __read_mostly host_xcr0;
154
155int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
156
157static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
158{
159 int i;
160 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
161 vcpu->arch.apf.gfns[i] = ~0;
162}
163
164static void kvm_on_user_return(struct user_return_notifier *urn)
165{
166 unsigned slot;
167 struct kvm_shared_msrs *locals
168 = container_of(urn, struct kvm_shared_msrs, urn);
169 struct kvm_shared_msr_values *values;
170
171 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
172 values = &locals->values[slot];
173 if (values->host != values->curr) {
174 wrmsrl(shared_msrs_global.msrs[slot], values->host);
175 values->curr = values->host;
176 }
177 }
178 locals->registered = false;
179 user_return_notifier_unregister(urn);
180}
181
182static void shared_msr_update(unsigned slot, u32 msr)
183{
184 struct kvm_shared_msrs *smsr;
185 u64 value;
186
187 smsr = &__get_cpu_var(shared_msrs);
188
189
190 if (slot >= shared_msrs_global.nr) {
191 printk(KERN_ERR "kvm: invalid MSR slot!");
192 return;
193 }
194 rdmsrl_safe(msr, &value);
195 smsr->values[slot].host = value;
196 smsr->values[slot].curr = value;
197}
198
199void kvm_define_shared_msr(unsigned slot, u32 msr)
200{
201 if (slot >= shared_msrs_global.nr)
202 shared_msrs_global.nr = slot + 1;
203 shared_msrs_global.msrs[slot] = msr;
204
205 smp_wmb();
206}
207EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
208
209static void kvm_shared_msr_cpu_online(void)
210{
211 unsigned i;
212
213 for (i = 0; i < shared_msrs_global.nr; ++i)
214 shared_msr_update(i, shared_msrs_global.msrs[i]);
215}
216
217void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
218{
219 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
220
221 if (((value ^ smsr->values[slot].curr) & mask) == 0)
222 return;
223 smsr->values[slot].curr = value;
224 wrmsrl(shared_msrs_global.msrs[slot], value);
225 if (!smsr->registered) {
226 smsr->urn.on_user_return = kvm_on_user_return;
227 user_return_notifier_register(&smsr->urn);
228 smsr->registered = true;
229 }
230}
231EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
232
233static void drop_user_return_notifiers(void *ignore)
234{
235 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
236
237 if (smsr->registered)
238 kvm_on_user_return(&smsr->urn);
239}
240
241u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
242{
243 if (irqchip_in_kernel(vcpu->kvm))
244 return vcpu->arch.apic_base;
245 else
246 return vcpu->arch.apic_base;
247}
248EXPORT_SYMBOL_GPL(kvm_get_apic_base);
249
250void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
251{
252
253 if (irqchip_in_kernel(vcpu->kvm))
254 kvm_lapic_set_base(vcpu, data);
255 else
256 vcpu->arch.apic_base = data;
257}
258EXPORT_SYMBOL_GPL(kvm_set_apic_base);
259
260#define EXCPT_BENIGN 0
261#define EXCPT_CONTRIBUTORY 1
262#define EXCPT_PF 2
263
264static int exception_class(int vector)
265{
266 switch (vector) {
267 case PF_VECTOR:
268 return EXCPT_PF;
269 case DE_VECTOR:
270 case TS_VECTOR:
271 case NP_VECTOR:
272 case SS_VECTOR:
273 case GP_VECTOR:
274 return EXCPT_CONTRIBUTORY;
275 default:
276 break;
277 }
278 return EXCPT_BENIGN;
279}
280
281static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
282 unsigned nr, bool has_error, u32 error_code,
283 bool reinject)
284{
285 u32 prev_nr;
286 int class1, class2;
287
288 kvm_make_request(KVM_REQ_EVENT, vcpu);
289
290 if (!vcpu->arch.exception.pending) {
291 queue:
292 vcpu->arch.exception.pending = true;
293 vcpu->arch.exception.has_error_code = has_error;
294 vcpu->arch.exception.nr = nr;
295 vcpu->arch.exception.error_code = error_code;
296 vcpu->arch.exception.reinject = reinject;
297 return;
298 }
299
300
301 prev_nr = vcpu->arch.exception.nr;
302 if (prev_nr == DF_VECTOR) {
303
304 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
305 return;
306 }
307 class1 = exception_class(prev_nr);
308 class2 = exception_class(nr);
309 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
310 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
311
312 vcpu->arch.exception.pending = true;
313 vcpu->arch.exception.has_error_code = true;
314 vcpu->arch.exception.nr = DF_VECTOR;
315 vcpu->arch.exception.error_code = 0;
316 } else
317
318
319
320 goto queue;
321}
322
323void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
324{
325 kvm_multiple_exception(vcpu, nr, false, 0, false);
326}
327EXPORT_SYMBOL_GPL(kvm_queue_exception);
328
329void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
330{
331 kvm_multiple_exception(vcpu, nr, false, 0, true);
332}
333EXPORT_SYMBOL_GPL(kvm_requeue_exception);
334
335void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
336{
337 if (err)
338 kvm_inject_gp(vcpu, 0);
339 else
340 kvm_x86_ops->skip_emulated_instruction(vcpu);
341}
342EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
343
344void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
345{
346 ++vcpu->stat.pf_guest;
347 vcpu->arch.cr2 = fault->address;
348 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
349}
350
351void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
352{
353 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
354 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
355 else
356 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
357}
358
359void kvm_inject_nmi(struct kvm_vcpu *vcpu)
360{
361 kvm_make_request(KVM_REQ_EVENT, vcpu);
362 vcpu->arch.nmi_pending = 1;
363}
364EXPORT_SYMBOL_GPL(kvm_inject_nmi);
365
366void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
367{
368 kvm_multiple_exception(vcpu, nr, true, error_code, false);
369}
370EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
371
372void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
373{
374 kvm_multiple_exception(vcpu, nr, true, error_code, true);
375}
376EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
377
378
379
380
381
382bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
383{
384 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
385 return true;
386 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
387 return false;
388}
389EXPORT_SYMBOL_GPL(kvm_require_cpl);
390
391
392
393
394
395
396int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
397 gfn_t ngfn, void *data, int offset, int len,
398 u32 access)
399{
400 gfn_t real_gfn;
401 gpa_t ngpa;
402
403 ngpa = gfn_to_gpa(ngfn);
404 real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
405 if (real_gfn == UNMAPPED_GVA)
406 return -EFAULT;
407
408 real_gfn = gpa_to_gfn(real_gfn);
409
410 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
411}
412EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
413
414int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
415 void *data, int offset, int len, u32 access)
416{
417 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
418 data, offset, len, access);
419}
420
421
422
423
424int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
425{
426 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
427 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
428 int i;
429 int ret;
430 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
431
432 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
433 offset * sizeof(u64), sizeof(pdpte),
434 PFERR_USER_MASK|PFERR_WRITE_MASK);
435 if (ret < 0) {
436 ret = 0;
437 goto out;
438 }
439 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
440 if (is_present_gpte(pdpte[i]) &&
441 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
442 ret = 0;
443 goto out;
444 }
445 }
446 ret = 1;
447
448 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
449 __set_bit(VCPU_EXREG_PDPTR,
450 (unsigned long *)&vcpu->arch.regs_avail);
451 __set_bit(VCPU_EXREG_PDPTR,
452 (unsigned long *)&vcpu->arch.regs_dirty);
453out:
454
455 return ret;
456}
457EXPORT_SYMBOL_GPL(load_pdptrs);
458
459static bool pdptrs_changed(struct kvm_vcpu *vcpu)
460{
461 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
462 bool changed = true;
463 int offset;
464 gfn_t gfn;
465 int r;
466
467 if (is_long_mode(vcpu) || !is_pae(vcpu))
468 return false;
469
470 if (!test_bit(VCPU_EXREG_PDPTR,
471 (unsigned long *)&vcpu->arch.regs_avail))
472 return true;
473
474 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
475 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
476 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
477 PFERR_USER_MASK | PFERR_WRITE_MASK);
478 if (r < 0)
479 goto out;
480 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
481out:
482
483 return changed;
484}
485
486int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
487{
488 unsigned long old_cr0 = kvm_read_cr0(vcpu);
489 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
490 X86_CR0_CD | X86_CR0_NW;
491
492 cr0 |= X86_CR0_ET;
493
494#ifdef CONFIG_X86_64
495 if (cr0 & 0xffffffff00000000UL)
496 return 1;
497#endif
498
499 cr0 &= ~CR0_RESERVED_BITS;
500
501 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
502 return 1;
503
504 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
505 return 1;
506
507 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
508#ifdef CONFIG_X86_64
509 if ((vcpu->arch.efer & EFER_LME)) {
510 int cs_db, cs_l;
511
512 if (!is_pae(vcpu))
513 return 1;
514 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
515 if (cs_l)
516 return 1;
517 } else
518#endif
519 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
520 kvm_read_cr3(vcpu)))
521 return 1;
522 }
523
524 kvm_x86_ops->set_cr0(vcpu, cr0);
525
526 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
527 kvm_clear_async_pf_completion_queue(vcpu);
528 kvm_async_pf_hash_reset(vcpu);
529 }
530
531 if ((cr0 ^ old_cr0) & update_bits)
532 kvm_mmu_reset_context(vcpu);
533 return 0;
534}
535EXPORT_SYMBOL_GPL(kvm_set_cr0);
536
537void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
538{
539 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
540}
541EXPORT_SYMBOL_GPL(kvm_lmsw);
542
543int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
544{
545 u64 xcr0;
546
547
548 if (index != XCR_XFEATURE_ENABLED_MASK)
549 return 1;
550 xcr0 = xcr;
551 if (kvm_x86_ops->get_cpl(vcpu) != 0)
552 return 1;
553 if (!(xcr0 & XSTATE_FP))
554 return 1;
555 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
556 return 1;
557 if (xcr0 & ~host_xcr0)
558 return 1;
559 vcpu->arch.xcr0 = xcr0;
560 vcpu->guest_xcr0_loaded = 0;
561 return 0;
562}
563
564int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
565{
566 if (__kvm_set_xcr(vcpu, index, xcr)) {
567 kvm_inject_gp(vcpu, 0);
568 return 1;
569 }
570 return 0;
571}
572EXPORT_SYMBOL_GPL(kvm_set_xcr);
573
574static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
575{
576 struct kvm_cpuid_entry2 *best;
577
578 best = kvm_find_cpuid_entry(vcpu, 1, 0);
579 return best && (best->ecx & bit(X86_FEATURE_XSAVE));
580}
581
582static void update_cpuid(struct kvm_vcpu *vcpu)
583{
584 struct kvm_cpuid_entry2 *best;
585
586 best = kvm_find_cpuid_entry(vcpu, 1, 0);
587 if (!best)
588 return;
589
590
591 if (cpu_has_xsave && best->function == 0x1) {
592 best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
593 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
594 best->ecx |= bit(X86_FEATURE_OSXSAVE);
595 }
596}
597
598int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
599{
600 unsigned long old_cr4 = kvm_read_cr4(vcpu);
601 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
602
603 if (cr4 & CR4_RESERVED_BITS)
604 return 1;
605
606 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
607 return 1;
608
609 if (is_long_mode(vcpu)) {
610 if (!(cr4 & X86_CR4_PAE))
611 return 1;
612 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
613 && ((cr4 ^ old_cr4) & pdptr_bits)
614 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
615 kvm_read_cr3(vcpu)))
616 return 1;
617
618 if (cr4 & X86_CR4_VMXE)
619 return 1;
620
621 kvm_x86_ops->set_cr4(vcpu, cr4);
622
623 if ((cr4 ^ old_cr4) & pdptr_bits)
624 kvm_mmu_reset_context(vcpu);
625
626 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
627 update_cpuid(vcpu);
628
629 return 0;
630}
631EXPORT_SYMBOL_GPL(kvm_set_cr4);
632
633int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
634{
635 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
636 kvm_mmu_sync_roots(vcpu);
637 kvm_mmu_flush_tlb(vcpu);
638 return 0;
639 }
640
641 if (is_long_mode(vcpu)) {
642 if (cr3 & CR3_L_MODE_RESERVED_BITS)
643 return 1;
644 } else {
645 if (is_pae(vcpu)) {
646 if (cr3 & CR3_PAE_RESERVED_BITS)
647 return 1;
648 if (is_paging(vcpu) &&
649 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
650 return 1;
651 }
652
653
654
655
656 }
657
658
659
660
661
662
663
664
665
666
667 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
668 return 1;
669 vcpu->arch.cr3 = cr3;
670 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
671 vcpu->arch.mmu.new_cr3(vcpu);
672 return 0;
673}
674EXPORT_SYMBOL_GPL(kvm_set_cr3);
675
676int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
677{
678 if (cr8 & CR8_RESERVED_BITS)
679 return 1;
680 if (irqchip_in_kernel(vcpu->kvm))
681 kvm_lapic_set_tpr(vcpu, cr8);
682 else
683 vcpu->arch.cr8 = cr8;
684 return 0;
685}
686EXPORT_SYMBOL_GPL(kvm_set_cr8);
687
688unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
689{
690 if (irqchip_in_kernel(vcpu->kvm))
691 return kvm_lapic_get_cr8(vcpu);
692 else
693 return vcpu->arch.cr8;
694}
695EXPORT_SYMBOL_GPL(kvm_get_cr8);
696
697static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
698{
699 switch (dr) {
700 case 0 ... 3:
701 vcpu->arch.db[dr] = val;
702 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
703 vcpu->arch.eff_db[dr] = val;
704 break;
705 case 4:
706 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
707 return 1;
708
709 case 6:
710 if (val & 0xffffffff00000000ULL)
711 return -1;
712 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
713 break;
714 case 5:
715 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
716 return 1;
717
718 default:
719 if (val & 0xffffffff00000000ULL)
720 return -1;
721 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
722 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
723 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
724 vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
725 }
726 break;
727 }
728
729 return 0;
730}
731
732int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
733{
734 int res;
735
736 res = __kvm_set_dr(vcpu, dr, val);
737 if (res > 0)
738 kvm_queue_exception(vcpu, UD_VECTOR);
739 else if (res < 0)
740 kvm_inject_gp(vcpu, 0);
741
742 return res;
743}
744EXPORT_SYMBOL_GPL(kvm_set_dr);
745
746static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
747{
748 switch (dr) {
749 case 0 ... 3:
750 *val = vcpu->arch.db[dr];
751 break;
752 case 4:
753 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
754 return 1;
755
756 case 6:
757 *val = vcpu->arch.dr6;
758 break;
759 case 5:
760 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
761 return 1;
762
763 default:
764 *val = vcpu->arch.dr7;
765 break;
766 }
767
768 return 0;
769}
770
771int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
772{
773 if (_kvm_get_dr(vcpu, dr, val)) {
774 kvm_queue_exception(vcpu, UD_VECTOR);
775 return 1;
776 }
777 return 0;
778}
779EXPORT_SYMBOL_GPL(kvm_get_dr);
780
781
782
783
784
785
786
787
788
789
790#define KVM_SAVE_MSRS_BEGIN 8
791static u32 msrs_to_save[] = {
792 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
793 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
794 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
795 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
796 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
797 MSR_STAR,
798#ifdef CONFIG_X86_64
799 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
800#endif
801 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
802};
803
804static unsigned num_msrs_to_save;
805
806static u32 emulated_msrs[] = {
807 MSR_IA32_MISC_ENABLE,
808 MSR_IA32_MCG_STATUS,
809 MSR_IA32_MCG_CTL,
810};
811
812static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
813{
814 u64 old_efer = vcpu->arch.efer;
815
816 if (efer & efer_reserved_bits)
817 return 1;
818
819 if (is_paging(vcpu)
820 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
821 return 1;
822
823 if (efer & EFER_FFXSR) {
824 struct kvm_cpuid_entry2 *feat;
825
826 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
827 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
828 return 1;
829 }
830
831 if (efer & EFER_SVME) {
832 struct kvm_cpuid_entry2 *feat;
833
834 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
835 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
836 return 1;
837 }
838
839 efer &= ~EFER_LMA;
840 efer |= vcpu->arch.efer & EFER_LMA;
841
842 kvm_x86_ops->set_efer(vcpu, efer);
843
844 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
845
846
847 if ((efer ^ old_efer) & EFER_NX)
848 kvm_mmu_reset_context(vcpu);
849
850 return 0;
851}
852
853void kvm_enable_efer_bits(u64 mask)
854{
855 efer_reserved_bits &= ~mask;
856}
857EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
858
859
860
861
862
863
864
865int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
866{
867 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
868}
869
870
871
872
873static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
874{
875 return kvm_set_msr(vcpu, index, *data);
876}
877
878static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
879{
880 int version;
881 int r;
882 struct pvclock_wall_clock wc;
883 struct timespec boot;
884
885 if (!wall_clock)
886 return;
887
888 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
889 if (r)
890 return;
891
892 if (version & 1)
893 ++version;
894
895 ++version;
896
897 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
898
899
900
901
902
903
904
905 getboottime(&boot);
906
907 wc.sec = boot.tv_sec;
908 wc.nsec = boot.tv_nsec;
909 wc.version = version;
910
911 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
912
913 version++;
914 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
915}
916
917static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
918{
919 uint32_t quotient, remainder;
920
921
922
923 __asm__ ( "divl %4"
924 : "=a" (quotient), "=d" (remainder)
925 : "0" (0), "1" (dividend), "r" (divisor) );
926 return quotient;
927}
928
929static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
930 s8 *pshift, u32 *pmultiplier)
931{
932 uint64_t scaled64;
933 int32_t shift = 0;
934 uint64_t tps64;
935 uint32_t tps32;
936
937 tps64 = base_khz * 1000LL;
938 scaled64 = scaled_khz * 1000LL;
939 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
940 tps64 >>= 1;
941 shift--;
942 }
943
944 tps32 = (uint32_t)tps64;
945 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
946 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
947 scaled64 >>= 1;
948 else
949 tps32 <<= 1;
950 shift++;
951 }
952
953 *pshift = shift;
954 *pmultiplier = div_frac(scaled64, tps32);
955
956 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
957 __func__, base_khz, scaled_khz, shift, *pmultiplier);
958}
959
960static inline u64 get_kernel_ns(void)
961{
962 struct timespec ts;
963
964 WARN_ON(preemptible());
965 ktime_get_ts(&ts);
966 monotonic_to_bootbased(&ts);
967 return timespec_to_ns(&ts);
968}
969
970static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
971unsigned long max_tsc_khz;
972
973static inline int kvm_tsc_changes_freq(void)
974{
975 int cpu = get_cpu();
976 int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
977 cpufreq_quick_get(cpu) != 0;
978 put_cpu();
979 return ret;
980}
981
982static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
983{
984 if (vcpu->arch.virtual_tsc_khz)
985 return vcpu->arch.virtual_tsc_khz;
986 else
987 return __this_cpu_read(cpu_tsc_khz);
988}
989
990static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
991{
992 u64 ret;
993
994 WARN_ON(preemptible());
995 if (kvm_tsc_changes_freq())
996 printk_once(KERN_WARNING
997 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
998 ret = nsec * vcpu_tsc_khz(vcpu);
999 do_div(ret, USEC_PER_SEC);
1000 return ret;
1001}
1002
1003static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1004{
1005
1006 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1007 &vcpu->arch.tsc_catchup_shift,
1008 &vcpu->arch.tsc_catchup_mult);
1009}
1010
1011static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1012{
1013 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
1014 vcpu->arch.tsc_catchup_mult,
1015 vcpu->arch.tsc_catchup_shift);
1016 tsc += vcpu->arch.last_tsc_write;
1017 return tsc;
1018}
1019
1020void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1021{
1022 struct kvm *kvm = vcpu->kvm;
1023 u64 offset, ns, elapsed;
1024 unsigned long flags;
1025 s64 sdiff;
1026
1027 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1028 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1029 ns = get_kernel_ns();
1030 elapsed = ns - kvm->arch.last_tsc_nsec;
1031 sdiff = data - kvm->arch.last_tsc_write;
1032 if (sdiff < 0)
1033 sdiff = -sdiff;
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044 if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
1045 elapsed < 5ULL * NSEC_PER_SEC) {
1046 if (!check_tsc_unstable()) {
1047 offset = kvm->arch.last_tsc_offset;
1048 pr_debug("kvm: matched tsc offset for %llu\n", data);
1049 } else {
1050 u64 delta = nsec_to_cycles(vcpu, elapsed);
1051 offset += delta;
1052 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1053 }
1054 ns = kvm->arch.last_tsc_nsec;
1055 }
1056 kvm->arch.last_tsc_nsec = ns;
1057 kvm->arch.last_tsc_write = data;
1058 kvm->arch.last_tsc_offset = offset;
1059 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1060 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1061
1062
1063 vcpu->arch.hv_clock.tsc_timestamp = 0;
1064 vcpu->arch.last_tsc_write = data;
1065 vcpu->arch.last_tsc_nsec = ns;
1066}
1067EXPORT_SYMBOL_GPL(kvm_write_tsc);
1068
1069static int kvm_guest_time_update(struct kvm_vcpu *v)
1070{
1071 unsigned long flags;
1072 struct kvm_vcpu_arch *vcpu = &v->arch;
1073 void *shared_kaddr;
1074 unsigned long this_tsc_khz;
1075 s64 kernel_ns, max_kernel_ns;
1076 u64 tsc_timestamp;
1077
1078
1079 local_irq_save(flags);
1080 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
1081 kernel_ns = get_kernel_ns();
1082 this_tsc_khz = vcpu_tsc_khz(v);
1083 if (unlikely(this_tsc_khz == 0)) {
1084 local_irq_restore(flags);
1085 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1086 return 1;
1087 }
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099 if (vcpu->tsc_catchup) {
1100 u64 tsc = compute_guest_tsc(v, kernel_ns);
1101 if (tsc > tsc_timestamp) {
1102 kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
1103 tsc_timestamp = tsc;
1104 }
1105 }
1106
1107 local_irq_restore(flags);
1108
1109 if (!vcpu->time_page)
1110 return 0;
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133 max_kernel_ns = 0;
1134 if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
1135 max_kernel_ns = vcpu->last_guest_tsc -
1136 vcpu->hv_clock.tsc_timestamp;
1137 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1138 vcpu->hv_clock.tsc_to_system_mul,
1139 vcpu->hv_clock.tsc_shift);
1140 max_kernel_ns += vcpu->last_kernel_ns;
1141 }
1142
1143 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1144 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1145 &vcpu->hv_clock.tsc_shift,
1146 &vcpu->hv_clock.tsc_to_system_mul);
1147 vcpu->hw_tsc_khz = this_tsc_khz;
1148 }
1149
1150 if (max_kernel_ns > kernel_ns)
1151 kernel_ns = max_kernel_ns;
1152
1153
1154 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1155 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1156 vcpu->last_kernel_ns = kernel_ns;
1157 vcpu->last_guest_tsc = tsc_timestamp;
1158 vcpu->hv_clock.flags = 0;
1159
1160
1161
1162
1163
1164
1165 vcpu->hv_clock.version += 2;
1166
1167 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
1168
1169 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1170 sizeof(vcpu->hv_clock));
1171
1172 kunmap_atomic(shared_kaddr, KM_USER0);
1173
1174 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1175 return 0;
1176}
1177
1178static bool msr_mtrr_valid(unsigned msr)
1179{
1180 switch (msr) {
1181 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1182 case MSR_MTRRfix64K_00000:
1183 case MSR_MTRRfix16K_80000:
1184 case MSR_MTRRfix16K_A0000:
1185 case MSR_MTRRfix4K_C0000:
1186 case MSR_MTRRfix4K_C8000:
1187 case MSR_MTRRfix4K_D0000:
1188 case MSR_MTRRfix4K_D8000:
1189 case MSR_MTRRfix4K_E0000:
1190 case MSR_MTRRfix4K_E8000:
1191 case MSR_MTRRfix4K_F0000:
1192 case MSR_MTRRfix4K_F8000:
1193 case MSR_MTRRdefType:
1194 case MSR_IA32_CR_PAT:
1195 return true;
1196 case 0x2f8:
1197 return true;
1198 }
1199 return false;
1200}
1201
1202static bool valid_pat_type(unsigned t)
1203{
1204 return t < 8 && (1 << t) & 0xf3;
1205}
1206
1207static bool valid_mtrr_type(unsigned t)
1208{
1209 return t < 8 && (1 << t) & 0x73;
1210}
1211
1212static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1213{
1214 int i;
1215
1216 if (!msr_mtrr_valid(msr))
1217 return false;
1218
1219 if (msr == MSR_IA32_CR_PAT) {
1220 for (i = 0; i < 8; i++)
1221 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1222 return false;
1223 return true;
1224 } else if (msr == MSR_MTRRdefType) {
1225 if (data & ~0xcff)
1226 return false;
1227 return valid_mtrr_type(data & 0xff);
1228 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1229 for (i = 0; i < 8 ; i++)
1230 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1231 return false;
1232 return true;
1233 }
1234
1235
1236 return valid_mtrr_type(data & 0xff);
1237}
1238
1239static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1240{
1241 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1242
1243 if (!mtrr_valid(vcpu, msr, data))
1244 return 1;
1245
1246 if (msr == MSR_MTRRdefType) {
1247 vcpu->arch.mtrr_state.def_type = data;
1248 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1249 } else if (msr == MSR_MTRRfix64K_00000)
1250 p[0] = data;
1251 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1252 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1253 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1254 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1255 else if (msr == MSR_IA32_CR_PAT)
1256 vcpu->arch.pat = data;
1257 else {
1258 int idx, is_mtrr_mask;
1259 u64 *pt;
1260
1261 idx = (msr - 0x200) / 2;
1262 is_mtrr_mask = msr - 0x200 - 2 * idx;
1263 if (!is_mtrr_mask)
1264 pt =
1265 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1266 else
1267 pt =
1268 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1269 *pt = data;
1270 }
1271
1272 kvm_mmu_reset_context(vcpu);
1273 return 0;
1274}
1275
1276static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1277{
1278 u64 mcg_cap = vcpu->arch.mcg_cap;
1279 unsigned bank_num = mcg_cap & 0xff;
1280
1281 switch (msr) {
1282 case MSR_IA32_MCG_STATUS:
1283 vcpu->arch.mcg_status = data;
1284 break;
1285 case MSR_IA32_MCG_CTL:
1286 if (!(mcg_cap & MCG_CTL_P))
1287 return 1;
1288 if (data != 0 && data != ~(u64)0)
1289 return -1;
1290 vcpu->arch.mcg_ctl = data;
1291 break;
1292 default:
1293 if (msr >= MSR_IA32_MC0_CTL &&
1294 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1295 u32 offset = msr - MSR_IA32_MC0_CTL;
1296
1297
1298
1299
1300
1301 if ((offset & 0x3) == 0 &&
1302 data != 0 && (data | (1 << 10)) != ~(u64)0)
1303 return -1;
1304 vcpu->arch.mce_banks[offset] = data;
1305 break;
1306 }
1307 return 1;
1308 }
1309 return 0;
1310}
1311
1312static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1313{
1314 struct kvm *kvm = vcpu->kvm;
1315 int lm = is_long_mode(vcpu);
1316 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1317 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1318 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1319 : kvm->arch.xen_hvm_config.blob_size_32;
1320 u32 page_num = data & ~PAGE_MASK;
1321 u64 page_addr = data & PAGE_MASK;
1322 u8 *page;
1323 int r;
1324
1325 r = -E2BIG;
1326 if (page_num >= blob_size)
1327 goto out;
1328 r = -ENOMEM;
1329 page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1330 if (!page)
1331 goto out;
1332 r = -EFAULT;
1333 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
1334 goto out_free;
1335 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1336 goto out_free;
1337 r = 0;
1338out_free:
1339 kfree(page);
1340out:
1341 return r;
1342}
1343
1344static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1345{
1346 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1347}
1348
1349static bool kvm_hv_msr_partition_wide(u32 msr)
1350{
1351 bool r = false;
1352 switch (msr) {
1353 case HV_X64_MSR_GUEST_OS_ID:
1354 case HV_X64_MSR_HYPERCALL:
1355 r = true;
1356 break;
1357 }
1358
1359 return r;
1360}
1361
1362static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1363{
1364 struct kvm *kvm = vcpu->kvm;
1365
1366 switch (msr) {
1367 case HV_X64_MSR_GUEST_OS_ID:
1368 kvm->arch.hv_guest_os_id = data;
1369
1370 if (!kvm->arch.hv_guest_os_id)
1371 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1372 break;
1373 case HV_X64_MSR_HYPERCALL: {
1374 u64 gfn;
1375 unsigned long addr;
1376 u8 instructions[4];
1377
1378
1379 if (!kvm->arch.hv_guest_os_id)
1380 break;
1381 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1382 kvm->arch.hv_hypercall = data;
1383 break;
1384 }
1385 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1386 addr = gfn_to_hva(kvm, gfn);
1387 if (kvm_is_error_hva(addr))
1388 return 1;
1389 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1390 ((unsigned char *)instructions)[3] = 0xc3;
1391 if (copy_to_user((void __user *)addr, instructions, 4))
1392 return 1;
1393 kvm->arch.hv_hypercall = data;
1394 break;
1395 }
1396 default:
1397 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1398 "data 0x%llx\n", msr, data);
1399 return 1;
1400 }
1401 return 0;
1402}
1403
1404static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1405{
1406 switch (msr) {
1407 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1408 unsigned long addr;
1409
1410 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1411 vcpu->arch.hv_vapic = data;
1412 break;
1413 }
1414 addr = gfn_to_hva(vcpu->kvm, data >>
1415 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1416 if (kvm_is_error_hva(addr))
1417 return 1;
1418 if (clear_user((void __user *)addr, PAGE_SIZE))
1419 return 1;
1420 vcpu->arch.hv_vapic = data;
1421 break;
1422 }
1423 case HV_X64_MSR_EOI:
1424 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1425 case HV_X64_MSR_ICR:
1426 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1427 case HV_X64_MSR_TPR:
1428 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1429 default:
1430 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1431 "data 0x%llx\n", msr, data);
1432 return 1;
1433 }
1434
1435 return 0;
1436}
1437
1438static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1439{
1440 gpa_t gpa = data & ~0x3f;
1441
1442
1443 if (data & 0x3c)
1444 return 1;
1445
1446 vcpu->arch.apf.msr_val = data;
1447
1448 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1449 kvm_clear_async_pf_completion_queue(vcpu);
1450 kvm_async_pf_hash_reset(vcpu);
1451 return 0;
1452 }
1453
1454 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
1455 return 1;
1456
1457 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1458 kvm_async_pf_wakeup_all(vcpu);
1459 return 0;
1460}
1461
1462static void kvmclock_reset(struct kvm_vcpu *vcpu)
1463{
1464 if (vcpu->arch.time_page) {
1465 kvm_release_page_dirty(vcpu->arch.time_page);
1466 vcpu->arch.time_page = NULL;
1467 }
1468}
1469
1470int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1471{
1472 switch (msr) {
1473 case MSR_EFER:
1474 return set_efer(vcpu, data);
1475 case MSR_K7_HWCR:
1476 data &= ~(u64)0x40;
1477 data &= ~(u64)0x100;
1478 if (data != 0) {
1479 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1480 data);
1481 return 1;
1482 }
1483 break;
1484 case MSR_FAM10H_MMIO_CONF_BASE:
1485 if (data != 0) {
1486 pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1487 "0x%llx\n", data);
1488 return 1;
1489 }
1490 break;
1491 case MSR_AMD64_NB_CFG:
1492 break;
1493 case MSR_IA32_DEBUGCTLMSR:
1494 if (!data) {
1495
1496 break;
1497 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
1498
1499
1500 return 1;
1501 }
1502 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1503 __func__, data);
1504 break;
1505 case MSR_IA32_UCODE_REV:
1506 case MSR_IA32_UCODE_WRITE:
1507 case MSR_VM_HSAVE_PA:
1508 case MSR_AMD64_PATCH_LOADER:
1509 break;
1510 case 0x200 ... 0x2ff:
1511 return set_msr_mtrr(vcpu, msr, data);
1512 case MSR_IA32_APICBASE:
1513 kvm_set_apic_base(vcpu, data);
1514 break;
1515 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1516 return kvm_x2apic_msr_write(vcpu, msr, data);
1517 case MSR_IA32_MISC_ENABLE:
1518 vcpu->arch.ia32_misc_enable_msr = data;
1519 break;
1520 case MSR_KVM_WALL_CLOCK_NEW:
1521 case MSR_KVM_WALL_CLOCK:
1522 vcpu->kvm->arch.wall_clock = data;
1523 kvm_write_wall_clock(vcpu->kvm, data);
1524 break;
1525 case MSR_KVM_SYSTEM_TIME_NEW:
1526 case MSR_KVM_SYSTEM_TIME: {
1527 kvmclock_reset(vcpu);
1528
1529 vcpu->arch.time = data;
1530 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1531
1532
1533 if (!(data & 1))
1534 break;
1535
1536
1537 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1538
1539 vcpu->arch.time_page =
1540 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1541
1542 if (is_error_page(vcpu->arch.time_page)) {
1543 kvm_release_page_clean(vcpu->arch.time_page);
1544 vcpu->arch.time_page = NULL;
1545 }
1546 break;
1547 }
1548 case MSR_KVM_ASYNC_PF_EN:
1549 if (kvm_pv_enable_async_pf(vcpu, data))
1550 return 1;
1551 break;
1552 case MSR_IA32_MCG_CTL:
1553 case MSR_IA32_MCG_STATUS:
1554 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1555 return set_msr_mce(vcpu, msr, data);
1556
1557
1558
1559
1560
1561
1562
1563
1564 case MSR_P6_EVNTSEL0:
1565 case MSR_P6_EVNTSEL1:
1566 case MSR_K7_EVNTSEL0:
1567 case MSR_K7_EVNTSEL1:
1568 case MSR_K7_EVNTSEL2:
1569 case MSR_K7_EVNTSEL3:
1570 if (data != 0)
1571 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1572 "0x%x data 0x%llx\n", msr, data);
1573 break;
1574
1575
1576
1577 case MSR_P6_PERFCTR0:
1578 case MSR_P6_PERFCTR1:
1579 case MSR_K7_PERFCTR0:
1580 case MSR_K7_PERFCTR1:
1581 case MSR_K7_PERFCTR2:
1582 case MSR_K7_PERFCTR3:
1583 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1584 "0x%x data 0x%llx\n", msr, data);
1585 break;
1586 case MSR_K7_CLK_CTL:
1587
1588
1589
1590
1591
1592
1593
1594
1595 break;
1596 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1597 if (kvm_hv_msr_partition_wide(msr)) {
1598 int r;
1599 mutex_lock(&vcpu->kvm->lock);
1600 r = set_msr_hyperv_pw(vcpu, msr, data);
1601 mutex_unlock(&vcpu->kvm->lock);
1602 return r;
1603 } else
1604 return set_msr_hyperv(vcpu, msr, data);
1605 break;
1606 case MSR_IA32_BBL_CR_CTL3:
1607
1608
1609
1610 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
1611 break;
1612 default:
1613 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1614 return xen_hvm_config(vcpu, data);
1615 if (!ignore_msrs) {
1616 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
1617 msr, data);
1618 return 1;
1619 } else {
1620 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
1621 msr, data);
1622 break;
1623 }
1624 }
1625 return 0;
1626}
1627EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1628
1629
1630
1631
1632
1633
1634
1635int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1636{
1637 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1638}
1639
1640static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1641{
1642 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1643
1644 if (!msr_mtrr_valid(msr))
1645 return 1;
1646
1647 if (msr == MSR_MTRRdefType)
1648 *pdata = vcpu->arch.mtrr_state.def_type +
1649 (vcpu->arch.mtrr_state.enabled << 10);
1650 else if (msr == MSR_MTRRfix64K_00000)
1651 *pdata = p[0];
1652 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1653 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
1654 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1655 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
1656 else if (msr == MSR_IA32_CR_PAT)
1657 *pdata = vcpu->arch.pat;
1658 else {
1659 int idx, is_mtrr_mask;
1660 u64 *pt;
1661
1662 idx = (msr - 0x200) / 2;
1663 is_mtrr_mask = msr - 0x200 - 2 * idx;
1664 if (!is_mtrr_mask)
1665 pt =
1666 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1667 else
1668 pt =
1669 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1670 *pdata = *pt;
1671 }
1672
1673 return 0;
1674}
1675
1676static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1677{
1678 u64 data;
1679 u64 mcg_cap = vcpu->arch.mcg_cap;
1680 unsigned bank_num = mcg_cap & 0xff;
1681
1682 switch (msr) {
1683 case MSR_IA32_P5_MC_ADDR:
1684 case MSR_IA32_P5_MC_TYPE:
1685 data = 0;
1686 break;
1687 case MSR_IA32_MCG_CAP:
1688 data = vcpu->arch.mcg_cap;
1689 break;
1690 case MSR_IA32_MCG_CTL:
1691 if (!(mcg_cap & MCG_CTL_P))
1692 return 1;
1693 data = vcpu->arch.mcg_ctl;
1694 break;
1695 case MSR_IA32_MCG_STATUS:
1696 data = vcpu->arch.mcg_status;
1697 break;
1698 default:
1699 if (msr >= MSR_IA32_MC0_CTL &&
1700 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1701 u32 offset = msr - MSR_IA32_MC0_CTL;
1702 data = vcpu->arch.mce_banks[offset];
1703 break;
1704 }
1705 return 1;
1706 }
1707 *pdata = data;
1708 return 0;
1709}
1710
1711static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1712{
1713 u64 data = 0;
1714 struct kvm *kvm = vcpu->kvm;
1715
1716 switch (msr) {
1717 case HV_X64_MSR_GUEST_OS_ID:
1718 data = kvm->arch.hv_guest_os_id;
1719 break;
1720 case HV_X64_MSR_HYPERCALL:
1721 data = kvm->arch.hv_hypercall;
1722 break;
1723 default:
1724 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1725 return 1;
1726 }
1727
1728 *pdata = data;
1729 return 0;
1730}
1731
1732static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1733{
1734 u64 data = 0;
1735
1736 switch (msr) {
1737 case HV_X64_MSR_VP_INDEX: {
1738 int r;
1739 struct kvm_vcpu *v;
1740 kvm_for_each_vcpu(r, v, vcpu->kvm)
1741 if (v == vcpu)
1742 data = r;
1743 break;
1744 }
1745 case HV_X64_MSR_EOI:
1746 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1747 case HV_X64_MSR_ICR:
1748 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1749 case HV_X64_MSR_TPR:
1750 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1751 default:
1752 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1753 return 1;
1754 }
1755 *pdata = data;
1756 return 0;
1757}
1758
1759int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1760{
1761 u64 data;
1762
1763 switch (msr) {
1764 case MSR_IA32_PLATFORM_ID:
1765 case MSR_IA32_UCODE_REV:
1766 case MSR_IA32_EBL_CR_POWERON:
1767 case MSR_IA32_DEBUGCTLMSR:
1768 case MSR_IA32_LASTBRANCHFROMIP:
1769 case MSR_IA32_LASTBRANCHTOIP:
1770 case MSR_IA32_LASTINTFROMIP:
1771 case MSR_IA32_LASTINTTOIP:
1772 case MSR_K8_SYSCFG:
1773 case MSR_K7_HWCR:
1774 case MSR_VM_HSAVE_PA:
1775 case MSR_P6_PERFCTR0:
1776 case MSR_P6_PERFCTR1:
1777 case MSR_P6_EVNTSEL0:
1778 case MSR_P6_EVNTSEL1:
1779 case MSR_K7_EVNTSEL0:
1780 case MSR_K7_PERFCTR0:
1781 case MSR_K8_INT_PENDING_MSG:
1782 case MSR_AMD64_NB_CFG:
1783 case MSR_FAM10H_MMIO_CONF_BASE:
1784 data = 0;
1785 break;
1786 case MSR_MTRRcap:
1787 data = 0x500 | KVM_NR_VAR_MTRR;
1788 break;
1789 case 0x200 ... 0x2ff:
1790 return get_msr_mtrr(vcpu, msr, pdata);
1791 case 0xcd:
1792 data = 3;
1793 break;
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805 case MSR_EBC_FREQUENCY_ID:
1806 data = 1 << 24;
1807 break;
1808 case MSR_IA32_APICBASE:
1809 data = kvm_get_apic_base(vcpu);
1810 break;
1811 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1812 return kvm_x2apic_msr_read(vcpu, msr, pdata);
1813 break;
1814 case MSR_IA32_MISC_ENABLE:
1815 data = vcpu->arch.ia32_misc_enable_msr;
1816 break;
1817 case MSR_IA32_PERF_STATUS:
1818
1819 data = 1000ULL;
1820
1821 data |= (((uint64_t)4ULL) << 40);
1822 break;
1823 case MSR_EFER:
1824 data = vcpu->arch.efer;
1825 break;
1826 case MSR_KVM_WALL_CLOCK:
1827 case MSR_KVM_WALL_CLOCK_NEW:
1828 data = vcpu->kvm->arch.wall_clock;
1829 break;
1830 case MSR_KVM_SYSTEM_TIME:
1831 case MSR_KVM_SYSTEM_TIME_NEW:
1832 data = vcpu->arch.time;
1833 break;
1834 case MSR_KVM_ASYNC_PF_EN:
1835 data = vcpu->arch.apf.msr_val;
1836 break;
1837 case MSR_IA32_P5_MC_ADDR:
1838 case MSR_IA32_P5_MC_TYPE:
1839 case MSR_IA32_MCG_CAP:
1840 case MSR_IA32_MCG_CTL:
1841 case MSR_IA32_MCG_STATUS:
1842 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1843 return get_msr_mce(vcpu, msr, pdata);
1844 case MSR_K7_CLK_CTL:
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854 data = 0x20000000;
1855 break;
1856 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1857 if (kvm_hv_msr_partition_wide(msr)) {
1858 int r;
1859 mutex_lock(&vcpu->kvm->lock);
1860 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1861 mutex_unlock(&vcpu->kvm->lock);
1862 return r;
1863 } else
1864 return get_msr_hyperv(vcpu, msr, pdata);
1865 break;
1866 case MSR_IA32_BBL_CR_CTL3:
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877 data = 0xbe702111;
1878 break;
1879 default:
1880 if (!ignore_msrs) {
1881 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1882 return 1;
1883 } else {
1884 pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
1885 data = 0;
1886 }
1887 break;
1888 }
1889 *pdata = data;
1890 return 0;
1891}
1892EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1893
1894
1895
1896
1897
1898
1899static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1900 struct kvm_msr_entry *entries,
1901 int (*do_msr)(struct kvm_vcpu *vcpu,
1902 unsigned index, u64 *data))
1903{
1904 int i, idx;
1905
1906 idx = srcu_read_lock(&vcpu->kvm->srcu);
1907 for (i = 0; i < msrs->nmsrs; ++i)
1908 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1909 break;
1910 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1911
1912 return i;
1913}
1914
1915
1916
1917
1918
1919
1920static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
1921 int (*do_msr)(struct kvm_vcpu *vcpu,
1922 unsigned index, u64 *data),
1923 int writeback)
1924{
1925 struct kvm_msrs msrs;
1926 struct kvm_msr_entry *entries;
1927 int r, n;
1928 unsigned size;
1929
1930 r = -EFAULT;
1931 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
1932 goto out;
1933
1934 r = -E2BIG;
1935 if (msrs.nmsrs >= MAX_IO_MSRS)
1936 goto out;
1937
1938 r = -ENOMEM;
1939 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
1940 entries = kmalloc(size, GFP_KERNEL);
1941 if (!entries)
1942 goto out;
1943
1944 r = -EFAULT;
1945 if (copy_from_user(entries, user_msrs->entries, size))
1946 goto out_free;
1947
1948 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
1949 if (r < 0)
1950 goto out_free;
1951
1952 r = -EFAULT;
1953 if (writeback && copy_to_user(user_msrs->entries, entries, size))
1954 goto out_free;
1955
1956 r = n;
1957
1958out_free:
1959 kfree(entries);
1960out:
1961 return r;
1962}
1963
1964int kvm_dev_ioctl_check_extension(long ext)
1965{
1966 int r;
1967
1968 switch (ext) {
1969 case KVM_CAP_IRQCHIP:
1970 case KVM_CAP_HLT:
1971 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
1972 case KVM_CAP_SET_TSS_ADDR:
1973 case KVM_CAP_EXT_CPUID:
1974 case KVM_CAP_CLOCKSOURCE:
1975 case KVM_CAP_PIT:
1976 case KVM_CAP_NOP_IO_DELAY:
1977 case KVM_CAP_MP_STATE:
1978 case KVM_CAP_SYNC_MMU:
1979 case KVM_CAP_USER_NMI:
1980 case KVM_CAP_REINJECT_CONTROL:
1981 case KVM_CAP_IRQ_INJECT_STATUS:
1982 case KVM_CAP_ASSIGN_DEV_IRQ:
1983 case KVM_CAP_IRQFD:
1984 case KVM_CAP_IOEVENTFD:
1985 case KVM_CAP_PIT2:
1986 case KVM_CAP_PIT_STATE2:
1987 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
1988 case KVM_CAP_XEN_HVM:
1989 case KVM_CAP_ADJUST_CLOCK:
1990 case KVM_CAP_VCPU_EVENTS:
1991 case KVM_CAP_HYPERV:
1992 case KVM_CAP_HYPERV_VAPIC:
1993 case KVM_CAP_HYPERV_SPIN:
1994 case KVM_CAP_PCI_SEGMENT:
1995 case KVM_CAP_DEBUGREGS:
1996 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1997 case KVM_CAP_XSAVE:
1998 case KVM_CAP_ASYNC_PF:
1999 case KVM_CAP_GET_TSC_KHZ:
2000 r = 1;
2001 break;
2002 case KVM_CAP_COALESCED_MMIO:
2003 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2004 break;
2005 case KVM_CAP_VAPIC:
2006 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2007 break;
2008 case KVM_CAP_NR_VCPUS:
2009 r = KVM_MAX_VCPUS;
2010 break;
2011 case KVM_CAP_NR_MEMSLOTS:
2012 r = KVM_MEMORY_SLOTS;
2013 break;
2014 case KVM_CAP_PV_MMU:
2015 r = 0;
2016 break;
2017 case KVM_CAP_IOMMU:
2018 r = iommu_found();
2019 break;
2020 case KVM_CAP_MCE:
2021 r = KVM_MAX_MCE_BANKS;
2022 break;
2023 case KVM_CAP_XCRS:
2024 r = cpu_has_xsave;
2025 break;
2026 case KVM_CAP_TSC_CONTROL:
2027 r = kvm_has_tsc_control;
2028 break;
2029 default:
2030 r = 0;
2031 break;
2032 }
2033 return r;
2034
2035}
2036
2037long kvm_arch_dev_ioctl(struct file *filp,
2038 unsigned int ioctl, unsigned long arg)
2039{
2040 void __user *argp = (void __user *)arg;
2041 long r;
2042
2043 switch (ioctl) {
2044 case KVM_GET_MSR_INDEX_LIST: {
2045 struct kvm_msr_list __user *user_msr_list = argp;
2046 struct kvm_msr_list msr_list;
2047 unsigned n;
2048
2049 r = -EFAULT;
2050 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2051 goto out;
2052 n = msr_list.nmsrs;
2053 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2054 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2055 goto out;
2056 r = -E2BIG;
2057 if (n < msr_list.nmsrs)
2058 goto out;
2059 r = -EFAULT;
2060 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2061 num_msrs_to_save * sizeof(u32)))
2062 goto out;
2063 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2064 &emulated_msrs,
2065 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2066 goto out;
2067 r = 0;
2068 break;
2069 }
2070 case KVM_GET_SUPPORTED_CPUID: {
2071 struct kvm_cpuid2 __user *cpuid_arg = argp;
2072 struct kvm_cpuid2 cpuid;
2073
2074 r = -EFAULT;
2075 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2076 goto out;
2077 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
2078 cpuid_arg->entries);
2079 if (r)
2080 goto out;
2081
2082 r = -EFAULT;
2083 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2084 goto out;
2085 r = 0;
2086 break;
2087 }
2088 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2089 u64 mce_cap;
2090
2091 mce_cap = KVM_MCE_CAP_SUPPORTED;
2092 r = -EFAULT;
2093 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2094 goto out;
2095 r = 0;
2096 break;
2097 }
2098 default:
2099 r = -EINVAL;
2100 }
2101out:
2102 return r;
2103}
2104
2105static void wbinvd_ipi(void *garbage)
2106{
2107 wbinvd();
2108}
2109
2110static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2111{
2112 return vcpu->kvm->arch.iommu_domain &&
2113 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2114}
2115
2116void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2117{
2118
2119 if (need_emulate_wbinvd(vcpu)) {
2120 if (kvm_x86_ops->has_wbinvd_exit())
2121 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2122 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2123 smp_call_function_single(vcpu->cpu,
2124 wbinvd_ipi, NULL, 1);
2125 }
2126
2127 kvm_x86_ops->vcpu_load(vcpu, cpu);
2128 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2129
2130 s64 tsc_delta;
2131 u64 tsc;
2132
2133 kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
2134 tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
2135 tsc - vcpu->arch.last_guest_tsc;
2136
2137 if (tsc_delta < 0)
2138 mark_tsc_unstable("KVM discovered backwards TSC");
2139 if (check_tsc_unstable()) {
2140 kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
2141 vcpu->arch.tsc_catchup = 1;
2142 }
2143 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2144 if (vcpu->cpu != cpu)
2145 kvm_migrate_timers(vcpu);
2146 vcpu->cpu = cpu;
2147 }
2148}
2149
2150void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2151{
2152 kvm_x86_ops->vcpu_put(vcpu);
2153 kvm_put_guest_fpu(vcpu);
2154 kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
2155}
2156
2157static int is_efer_nx(void)
2158{
2159 unsigned long long efer = 0;
2160
2161 rdmsrl_safe(MSR_EFER, &efer);
2162 return efer & EFER_NX;
2163}
2164
2165static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
2166{
2167 int i;
2168 struct kvm_cpuid_entry2 *e, *entry;
2169
2170 entry = NULL;
2171 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2172 e = &vcpu->arch.cpuid_entries[i];
2173 if (e->function == 0x80000001) {
2174 entry = e;
2175 break;
2176 }
2177 }
2178 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
2179 entry->edx &= ~(1 << 20);
2180 printk(KERN_INFO "kvm: guest NX capability removed\n");
2181 }
2182}
2183
2184
2185static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
2186 struct kvm_cpuid *cpuid,
2187 struct kvm_cpuid_entry __user *entries)
2188{
2189 int r, i;
2190 struct kvm_cpuid_entry *cpuid_entries;
2191
2192 r = -E2BIG;
2193 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2194 goto out;
2195 r = -ENOMEM;
2196 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
2197 if (!cpuid_entries)
2198 goto out;
2199 r = -EFAULT;
2200 if (copy_from_user(cpuid_entries, entries,
2201 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
2202 goto out_free;
2203 for (i = 0; i < cpuid->nent; i++) {
2204 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
2205 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
2206 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
2207 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
2208 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
2209 vcpu->arch.cpuid_entries[i].index = 0;
2210 vcpu->arch.cpuid_entries[i].flags = 0;
2211 vcpu->arch.cpuid_entries[i].padding[0] = 0;
2212 vcpu->arch.cpuid_entries[i].padding[1] = 0;
2213 vcpu->arch.cpuid_entries[i].padding[2] = 0;
2214 }
2215 vcpu->arch.cpuid_nent = cpuid->nent;
2216 cpuid_fix_nx_cap(vcpu);
2217 r = 0;
2218 kvm_apic_set_version(vcpu);
2219 kvm_x86_ops->cpuid_update(vcpu);
2220 update_cpuid(vcpu);
2221
2222out_free:
2223 vfree(cpuid_entries);
2224out:
2225 return r;
2226}
2227
2228static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
2229 struct kvm_cpuid2 *cpuid,
2230 struct kvm_cpuid_entry2 __user *entries)
2231{
2232 int r;
2233
2234 r = -E2BIG;
2235 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2236 goto out;
2237 r = -EFAULT;
2238 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
2239 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
2240 goto out;
2241 vcpu->arch.cpuid_nent = cpuid->nent;
2242 kvm_apic_set_version(vcpu);
2243 kvm_x86_ops->cpuid_update(vcpu);
2244 update_cpuid(vcpu);
2245 return 0;
2246
2247out:
2248 return r;
2249}
2250
2251static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
2252 struct kvm_cpuid2 *cpuid,
2253 struct kvm_cpuid_entry2 __user *entries)
2254{
2255 int r;
2256
2257 r = -E2BIG;
2258 if (cpuid->nent < vcpu->arch.cpuid_nent)
2259 goto out;
2260 r = -EFAULT;
2261 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
2262 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
2263 goto out;
2264 return 0;
2265
2266out:
2267 cpuid->nent = vcpu->arch.cpuid_nent;
2268 return r;
2269}
2270
2271static void cpuid_mask(u32 *word, int wordnum)
2272{
2273 *word &= boot_cpu_data.x86_capability[wordnum];
2274}
2275
2276static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2277 u32 index)
2278{
2279 entry->function = function;
2280 entry->index = index;
2281 cpuid_count(entry->function, entry->index,
2282 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
2283 entry->flags = 0;
2284}
2285
2286#define F(x) bit(X86_FEATURE_##x)
2287
2288static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2289 u32 index, int *nent, int maxnent)
2290{
2291 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
2292#ifdef CONFIG_X86_64
2293 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
2294 ? F(GBPAGES) : 0;
2295 unsigned f_lm = F(LM);
2296#else
2297 unsigned f_gbpages = 0;
2298 unsigned f_lm = 0;
2299#endif
2300 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
2301
2302
2303 const u32 kvm_supported_word0_x86_features =
2304 F(FPU) | F(VME) | F(DE) | F(PSE) |
2305 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
2306 F(CX8) | F(APIC) | 0 | F(SEP) |
2307 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
2308 F(PAT) | F(PSE36) | 0 | F(CLFLSH) |
2309 0 | F(MMX) |
2310 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
2311 0 ;
2312
2313 const u32 kvm_supported_word1_x86_features =
2314 F(FPU) | F(VME) | F(DE) | F(PSE) |
2315 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
2316 F(CX8) | F(APIC) | 0 | F(SYSCALL) |
2317 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
2318 F(PAT) | F(PSE36) | 0 |
2319 f_nx | 0 | F(MMXEXT) | F(MMX) |
2320 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
2321 0 | f_lm | F(3DNOWEXT) | F(3DNOW);
2322
2323 const u32 kvm_supported_word4_x86_features =
2324 F(XMM3) | F(PCLMULQDQ) | 0 |
2325 0 |
2326 0 | F(SSSE3) | 0 | 0 |
2327 0 | F(CX16) | 0 |
2328 0 | F(XMM4_1) |
2329 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
2330 0 | F(AES) | F(XSAVE) | 0 | F(AVX) |
2331 F(F16C);
2332
2333 const u32 kvm_supported_word6_x86_features =
2334 F(LAHF_LM) | F(CMP_LEGACY) | 0 | 0 |
2335 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
2336 F(3DNOWPREFETCH) | 0 | 0 | F(XOP) |
2337 0 | F(FMA4) | F(TBM);
2338
2339
2340 const u32 kvm_supported_word5_x86_features =
2341 F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
2342 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
2343 F(PMM) | F(PMM_EN);
2344
2345
2346 get_cpu();
2347 do_cpuid_1_ent(entry, function, index);
2348 ++*nent;
2349
2350 switch (function) {
2351 case 0:
2352 entry->eax = min(entry->eax, (u32)0xd);
2353 break;
2354 case 1:
2355 entry->edx &= kvm_supported_word0_x86_features;
2356 cpuid_mask(&entry->edx, 0);
2357 entry->ecx &= kvm_supported_word4_x86_features;
2358 cpuid_mask(&entry->ecx, 4);
2359
2360
2361 entry->ecx |= F(X2APIC);
2362 break;
2363
2364
2365
2366
2367 case 2: {
2368 int t, times = entry->eax & 0xff;
2369
2370 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
2371 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
2372 for (t = 1; t < times && *nent < maxnent; ++t) {
2373 do_cpuid_1_ent(&entry[t], function, 0);
2374 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
2375 ++*nent;
2376 }
2377 break;
2378 }
2379
2380 case 4: {
2381 int i, cache_type;
2382
2383 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2384
2385 for (i = 1; *nent < maxnent; ++i) {
2386 cache_type = entry[i - 1].eax & 0x1f;
2387 if (!cache_type)
2388 break;
2389 do_cpuid_1_ent(&entry[i], function, i);
2390 entry[i].flags |=
2391 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2392 ++*nent;
2393 }
2394 break;
2395 }
2396 case 0xb: {
2397 int i, level_type;
2398
2399 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2400
2401 for (i = 1; *nent < maxnent; ++i) {
2402 level_type = entry[i - 1].ecx & 0xff00;
2403 if (!level_type)
2404 break;
2405 do_cpuid_1_ent(&entry[i], function, i);
2406 entry[i].flags |=
2407 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2408 ++*nent;
2409 }
2410 break;
2411 }
2412 case 0xd: {
2413 int i;
2414
2415 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2416 for (i = 1; *nent < maxnent && i < 64; ++i) {
2417 if (entry[i].eax == 0)
2418 continue;
2419 do_cpuid_1_ent(&entry[i], function, i);
2420 entry[i].flags |=
2421 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2422 ++*nent;
2423 }
2424 break;
2425 }
2426 case KVM_CPUID_SIGNATURE: {
2427 char signature[12] = "KVMKVMKVM\0\0";
2428 u32 *sigptr = (u32 *)signature;
2429 entry->eax = 0;
2430 entry->ebx = sigptr[0];
2431 entry->ecx = sigptr[1];
2432 entry->edx = sigptr[2];
2433 break;
2434 }
2435 case KVM_CPUID_FEATURES:
2436 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
2437 (1 << KVM_FEATURE_NOP_IO_DELAY) |
2438 (1 << KVM_FEATURE_CLOCKSOURCE2) |
2439 (1 << KVM_FEATURE_ASYNC_PF) |
2440 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
2441 entry->ebx = 0;
2442 entry->ecx = 0;
2443 entry->edx = 0;
2444 break;
2445 case 0x80000000:
2446 entry->eax = min(entry->eax, 0x8000001a);
2447 break;
2448 case 0x80000001:
2449 entry->edx &= kvm_supported_word1_x86_features;
2450 cpuid_mask(&entry->edx, 1);
2451 entry->ecx &= kvm_supported_word6_x86_features;
2452 cpuid_mask(&entry->ecx, 6);
2453 break;
2454
2455 case 0xC0000000:
2456
2457 entry->eax = min(entry->eax, 0xC0000004);
2458 break;
2459 case 0xC0000001:
2460 entry->edx &= kvm_supported_word5_x86_features;
2461 cpuid_mask(&entry->edx, 5);
2462 break;
2463 case 0xC0000002:
2464 case 0xC0000003:
2465 case 0xC0000004:
2466
2467 break;
2468 }
2469
2470 kvm_x86_ops->set_supported_cpuid(function, entry);
2471
2472 put_cpu();
2473}
2474
2475#undef F
2476
2477static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
2478 struct kvm_cpuid_entry2 __user *entries)
2479{
2480 struct kvm_cpuid_entry2 *cpuid_entries;
2481 int limit, nent = 0, r = -E2BIG;
2482 u32 func;
2483
2484 if (cpuid->nent < 1)
2485 goto out;
2486 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2487 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
2488 r = -ENOMEM;
2489 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
2490 if (!cpuid_entries)
2491 goto out;
2492
2493 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
2494 limit = cpuid_entries[0].eax;
2495 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
2496 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2497 &nent, cpuid->nent);
2498 r = -E2BIG;
2499 if (nent >= cpuid->nent)
2500 goto out_free;
2501
2502 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
2503 limit = cpuid_entries[nent - 1].eax;
2504 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
2505 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2506 &nent, cpuid->nent);
2507
2508
2509
2510 r = -E2BIG;
2511 if (nent >= cpuid->nent)
2512 goto out_free;
2513
2514
2515 if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) {
2516 do_cpuid_ent(&cpuid_entries[nent], 0xC0000000, 0,
2517 &nent, cpuid->nent);
2518
2519 r = -E2BIG;
2520 if (nent >= cpuid->nent)
2521 goto out_free;
2522
2523 limit = cpuid_entries[nent - 1].eax;
2524 for (func = 0xC0000001;
2525 func <= limit && nent < cpuid->nent; ++func)
2526 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2527 &nent, cpuid->nent);
2528
2529 r = -E2BIG;
2530 if (nent >= cpuid->nent)
2531 goto out_free;
2532 }
2533
2534 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
2535 cpuid->nent);
2536
2537 r = -E2BIG;
2538 if (nent >= cpuid->nent)
2539 goto out_free;
2540
2541 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent,
2542 cpuid->nent);
2543
2544 r = -E2BIG;
2545 if (nent >= cpuid->nent)
2546 goto out_free;
2547
2548 r = -EFAULT;
2549 if (copy_to_user(entries, cpuid_entries,
2550 nent * sizeof(struct kvm_cpuid_entry2)))
2551 goto out_free;
2552 cpuid->nent = nent;
2553 r = 0;
2554
2555out_free:
2556 vfree(cpuid_entries);
2557out:
2558 return r;
2559}
2560
2561static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2562 struct kvm_lapic_state *s)
2563{
2564 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2565
2566 return 0;
2567}
2568
2569static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2570 struct kvm_lapic_state *s)
2571{
2572 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2573 kvm_apic_post_state_restore(vcpu);
2574 update_cr8_intercept(vcpu);
2575
2576 return 0;
2577}
2578
2579static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2580 struct kvm_interrupt *irq)
2581{
2582 if (irq->irq < 0 || irq->irq >= 256)
2583 return -EINVAL;
2584 if (irqchip_in_kernel(vcpu->kvm))
2585 return -ENXIO;
2586
2587 kvm_queue_interrupt(vcpu, irq->irq, false);
2588 kvm_make_request(KVM_REQ_EVENT, vcpu);
2589
2590 return 0;
2591}
2592
2593static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2594{
2595 kvm_inject_nmi(vcpu);
2596
2597 return 0;
2598}
2599
2600static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2601 struct kvm_tpr_access_ctl *tac)
2602{
2603 if (tac->flags)
2604 return -EINVAL;
2605 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2606 return 0;
2607}
2608
2609static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2610 u64 mcg_cap)
2611{
2612 int r;
2613 unsigned bank_num = mcg_cap & 0xff, bank;
2614
2615 r = -EINVAL;
2616 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2617 goto out;
2618 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2619 goto out;
2620 r = 0;
2621 vcpu->arch.mcg_cap = mcg_cap;
2622
2623 if (mcg_cap & MCG_CTL_P)
2624 vcpu->arch.mcg_ctl = ~(u64)0;
2625
2626 for (bank = 0; bank < bank_num; bank++)
2627 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2628out:
2629 return r;
2630}
2631
2632static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2633 struct kvm_x86_mce *mce)
2634{
2635 u64 mcg_cap = vcpu->arch.mcg_cap;
2636 unsigned bank_num = mcg_cap & 0xff;
2637 u64 *banks = vcpu->arch.mce_banks;
2638
2639 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2640 return -EINVAL;
2641
2642
2643
2644
2645 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2646 vcpu->arch.mcg_ctl != ~(u64)0)
2647 return 0;
2648 banks += 4 * mce->bank;
2649
2650
2651
2652
2653 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2654 return 0;
2655 if (mce->status & MCI_STATUS_UC) {
2656 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2657 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2658 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2659 return 0;
2660 }
2661 if (banks[1] & MCI_STATUS_VAL)
2662 mce->status |= MCI_STATUS_OVER;
2663 banks[2] = mce->addr;
2664 banks[3] = mce->misc;
2665 vcpu->arch.mcg_status = mce->mcg_status;
2666 banks[1] = mce->status;
2667 kvm_queue_exception(vcpu, MC_VECTOR);
2668 } else if (!(banks[1] & MCI_STATUS_VAL)
2669 || !(banks[1] & MCI_STATUS_UC)) {
2670 if (banks[1] & MCI_STATUS_VAL)
2671 mce->status |= MCI_STATUS_OVER;
2672 banks[2] = mce->addr;
2673 banks[3] = mce->misc;
2674 banks[1] = mce->status;
2675 } else
2676 banks[1] |= MCI_STATUS_OVER;
2677 return 0;
2678}
2679
2680static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2681 struct kvm_vcpu_events *events)
2682{
2683 events->exception.injected =
2684 vcpu->arch.exception.pending &&
2685 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2686 events->exception.nr = vcpu->arch.exception.nr;
2687 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2688 events->exception.pad = 0;
2689 events->exception.error_code = vcpu->arch.exception.error_code;
2690
2691 events->interrupt.injected =
2692 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2693 events->interrupt.nr = vcpu->arch.interrupt.nr;
2694 events->interrupt.soft = 0;
2695 events->interrupt.shadow =
2696 kvm_x86_ops->get_interrupt_shadow(vcpu,
2697 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2698
2699 events->nmi.injected = vcpu->arch.nmi_injected;
2700 events->nmi.pending = vcpu->arch.nmi_pending;
2701 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2702 events->nmi.pad = 0;
2703
2704 events->sipi_vector = vcpu->arch.sipi_vector;
2705
2706 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2707 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2708 | KVM_VCPUEVENT_VALID_SHADOW);
2709 memset(&events->reserved, 0, sizeof(events->reserved));
2710}
2711
2712static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2713 struct kvm_vcpu_events *events)
2714{
2715 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2716 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2717 | KVM_VCPUEVENT_VALID_SHADOW))
2718 return -EINVAL;
2719
2720 vcpu->arch.exception.pending = events->exception.injected;
2721 vcpu->arch.exception.nr = events->exception.nr;
2722 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2723 vcpu->arch.exception.error_code = events->exception.error_code;
2724
2725 vcpu->arch.interrupt.pending = events->interrupt.injected;
2726 vcpu->arch.interrupt.nr = events->interrupt.nr;
2727 vcpu->arch.interrupt.soft = events->interrupt.soft;
2728 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2729 kvm_x86_ops->set_interrupt_shadow(vcpu,
2730 events->interrupt.shadow);
2731
2732 vcpu->arch.nmi_injected = events->nmi.injected;
2733 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2734 vcpu->arch.nmi_pending = events->nmi.pending;
2735 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2736
2737 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2738 vcpu->arch.sipi_vector = events->sipi_vector;
2739
2740 kvm_make_request(KVM_REQ_EVENT, vcpu);
2741
2742 return 0;
2743}
2744
2745static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2746 struct kvm_debugregs *dbgregs)
2747{
2748 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2749 dbgregs->dr6 = vcpu->arch.dr6;
2750 dbgregs->dr7 = vcpu->arch.dr7;
2751 dbgregs->flags = 0;
2752 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
2753}
2754
2755static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2756 struct kvm_debugregs *dbgregs)
2757{
2758 if (dbgregs->flags)
2759 return -EINVAL;
2760
2761 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2762 vcpu->arch.dr6 = dbgregs->dr6;
2763 vcpu->arch.dr7 = dbgregs->dr7;
2764
2765 return 0;
2766}
2767
2768static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2769 struct kvm_xsave *guest_xsave)
2770{
2771 if (cpu_has_xsave)
2772 memcpy(guest_xsave->region,
2773 &vcpu->arch.guest_fpu.state->xsave,
2774 xstate_size);
2775 else {
2776 memcpy(guest_xsave->region,
2777 &vcpu->arch.guest_fpu.state->fxsave,
2778 sizeof(struct i387_fxsave_struct));
2779 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
2780 XSTATE_FPSSE;
2781 }
2782}
2783
2784static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
2785 struct kvm_xsave *guest_xsave)
2786{
2787 u64 xstate_bv =
2788 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
2789
2790 if (cpu_has_xsave)
2791 memcpy(&vcpu->arch.guest_fpu.state->xsave,
2792 guest_xsave->region, xstate_size);
2793 else {
2794 if (xstate_bv & ~XSTATE_FPSSE)
2795 return -EINVAL;
2796 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
2797 guest_xsave->region, sizeof(struct i387_fxsave_struct));
2798 }
2799 return 0;
2800}
2801
2802static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
2803 struct kvm_xcrs *guest_xcrs)
2804{
2805 if (!cpu_has_xsave) {
2806 guest_xcrs->nr_xcrs = 0;
2807 return;
2808 }
2809
2810 guest_xcrs->nr_xcrs = 1;
2811 guest_xcrs->flags = 0;
2812 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
2813 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
2814}
2815
2816static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
2817 struct kvm_xcrs *guest_xcrs)
2818{
2819 int i, r = 0;
2820
2821 if (!cpu_has_xsave)
2822 return -EINVAL;
2823
2824 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
2825 return -EINVAL;
2826
2827 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
2828
2829 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
2830 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
2831 guest_xcrs->xcrs[0].value);
2832 break;
2833 }
2834 if (r)
2835 r = -EINVAL;
2836 return r;
2837}
2838
2839long kvm_arch_vcpu_ioctl(struct file *filp,
2840 unsigned int ioctl, unsigned long arg)
2841{
2842 struct kvm_vcpu *vcpu = filp->private_data;
2843 void __user *argp = (void __user *)arg;
2844 int r;
2845 union {
2846 struct kvm_lapic_state *lapic;
2847 struct kvm_xsave *xsave;
2848 struct kvm_xcrs *xcrs;
2849 void *buffer;
2850 } u;
2851
2852 u.buffer = NULL;
2853 switch (ioctl) {
2854 case KVM_GET_LAPIC: {
2855 r = -EINVAL;
2856 if (!vcpu->arch.apic)
2857 goto out;
2858 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2859
2860 r = -ENOMEM;
2861 if (!u.lapic)
2862 goto out;
2863 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
2864 if (r)
2865 goto out;
2866 r = -EFAULT;
2867 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
2868 goto out;
2869 r = 0;
2870 break;
2871 }
2872 case KVM_SET_LAPIC: {
2873 r = -EINVAL;
2874 if (!vcpu->arch.apic)
2875 goto out;
2876 u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2877 r = -ENOMEM;
2878 if (!u.lapic)
2879 goto out;
2880 r = -EFAULT;
2881 if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))
2882 goto out;
2883 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
2884 if (r)
2885 goto out;
2886 r = 0;
2887 break;
2888 }
2889 case KVM_INTERRUPT: {
2890 struct kvm_interrupt irq;
2891
2892 r = -EFAULT;
2893 if (copy_from_user(&irq, argp, sizeof irq))
2894 goto out;
2895 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
2896 if (r)
2897 goto out;
2898 r = 0;
2899 break;
2900 }
2901 case KVM_NMI: {
2902 r = kvm_vcpu_ioctl_nmi(vcpu);
2903 if (r)
2904 goto out;
2905 r = 0;
2906 break;
2907 }
2908 case KVM_SET_CPUID: {
2909 struct kvm_cpuid __user *cpuid_arg = argp;
2910 struct kvm_cpuid cpuid;
2911
2912 r = -EFAULT;
2913 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2914 goto out;
2915 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2916 if (r)
2917 goto out;
2918 break;
2919 }
2920 case KVM_SET_CPUID2: {
2921 struct kvm_cpuid2 __user *cpuid_arg = argp;
2922 struct kvm_cpuid2 cpuid;
2923
2924 r = -EFAULT;
2925 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2926 goto out;
2927 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
2928 cpuid_arg->entries);
2929 if (r)
2930 goto out;
2931 break;
2932 }
2933 case KVM_GET_CPUID2: {
2934 struct kvm_cpuid2 __user *cpuid_arg = argp;
2935 struct kvm_cpuid2 cpuid;
2936
2937 r = -EFAULT;
2938 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2939 goto out;
2940 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
2941 cpuid_arg->entries);
2942 if (r)
2943 goto out;
2944 r = -EFAULT;
2945 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2946 goto out;
2947 r = 0;
2948 break;
2949 }
2950 case KVM_GET_MSRS:
2951 r = msr_io(vcpu, argp, kvm_get_msr, 1);
2952 break;
2953 case KVM_SET_MSRS:
2954 r = msr_io(vcpu, argp, do_set_msr, 0);
2955 break;
2956 case KVM_TPR_ACCESS_REPORTING: {
2957 struct kvm_tpr_access_ctl tac;
2958
2959 r = -EFAULT;
2960 if (copy_from_user(&tac, argp, sizeof tac))
2961 goto out;
2962 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
2963 if (r)
2964 goto out;
2965 r = -EFAULT;
2966 if (copy_to_user(argp, &tac, sizeof tac))
2967 goto out;
2968 r = 0;
2969 break;
2970 };
2971 case KVM_SET_VAPIC_ADDR: {
2972 struct kvm_vapic_addr va;
2973
2974 r = -EINVAL;
2975 if (!irqchip_in_kernel(vcpu->kvm))
2976 goto out;
2977 r = -EFAULT;
2978 if (copy_from_user(&va, argp, sizeof va))
2979 goto out;
2980 r = 0;
2981 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
2982 break;
2983 }
2984 case KVM_X86_SETUP_MCE: {
2985 u64 mcg_cap;
2986
2987 r = -EFAULT;
2988 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
2989 goto out;
2990 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
2991 break;
2992 }
2993 case KVM_X86_SET_MCE: {
2994 struct kvm_x86_mce mce;
2995
2996 r = -EFAULT;
2997 if (copy_from_user(&mce, argp, sizeof mce))
2998 goto out;
2999 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3000 break;
3001 }
3002 case KVM_GET_VCPU_EVENTS: {
3003 struct kvm_vcpu_events events;
3004
3005 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3006
3007 r = -EFAULT;
3008 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3009 break;
3010 r = 0;
3011 break;
3012 }
3013 case KVM_SET_VCPU_EVENTS: {
3014 struct kvm_vcpu_events events;
3015
3016 r = -EFAULT;
3017 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3018 break;
3019
3020 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3021 break;
3022 }
3023 case KVM_GET_DEBUGREGS: {
3024 struct kvm_debugregs dbgregs;
3025
3026 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3027
3028 r = -EFAULT;
3029 if (copy_to_user(argp, &dbgregs,
3030 sizeof(struct kvm_debugregs)))
3031 break;
3032 r = 0;
3033 break;
3034 }
3035 case KVM_SET_DEBUGREGS: {
3036 struct kvm_debugregs dbgregs;
3037
3038 r = -EFAULT;
3039 if (copy_from_user(&dbgregs, argp,
3040 sizeof(struct kvm_debugregs)))
3041 break;
3042
3043 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3044 break;
3045 }
3046 case KVM_GET_XSAVE: {
3047 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3048 r = -ENOMEM;
3049 if (!u.xsave)
3050 break;
3051
3052 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3053
3054 r = -EFAULT;
3055 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3056 break;
3057 r = 0;
3058 break;
3059 }
3060 case KVM_SET_XSAVE: {
3061 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3062 r = -ENOMEM;
3063 if (!u.xsave)
3064 break;
3065
3066 r = -EFAULT;
3067 if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave)))
3068 break;
3069
3070 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3071 break;
3072 }
3073 case KVM_GET_XCRS: {
3074 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3075 r = -ENOMEM;
3076 if (!u.xcrs)
3077 break;
3078
3079 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3080
3081 r = -EFAULT;
3082 if (copy_to_user(argp, u.xcrs,
3083 sizeof(struct kvm_xcrs)))
3084 break;
3085 r = 0;
3086 break;
3087 }
3088 case KVM_SET_XCRS: {
3089 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3090 r = -ENOMEM;
3091 if (!u.xcrs)
3092 break;
3093
3094 r = -EFAULT;
3095 if (copy_from_user(u.xcrs, argp,
3096 sizeof(struct kvm_xcrs)))
3097 break;
3098
3099 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3100 break;
3101 }
3102 case KVM_SET_TSC_KHZ: {
3103 u32 user_tsc_khz;
3104
3105 r = -EINVAL;
3106 if (!kvm_has_tsc_control)
3107 break;
3108
3109 user_tsc_khz = (u32)arg;
3110
3111 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3112 goto out;
3113
3114 kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
3115
3116 r = 0;
3117 goto out;
3118 }
3119 case KVM_GET_TSC_KHZ: {
3120 r = -EIO;
3121 if (check_tsc_unstable())
3122 goto out;
3123
3124 r = vcpu_tsc_khz(vcpu);
3125
3126 goto out;
3127 }
3128 default:
3129 r = -EINVAL;
3130 }
3131out:
3132 kfree(u.buffer);
3133 return r;
3134}
3135
3136static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3137{
3138 int ret;
3139
3140 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3141 return -1;
3142 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3143 return ret;
3144}
3145
3146static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3147 u64 ident_addr)
3148{
3149 kvm->arch.ept_identity_map_addr = ident_addr;
3150 return 0;
3151}
3152
3153static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3154 u32 kvm_nr_mmu_pages)
3155{
3156 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3157 return -EINVAL;
3158
3159 mutex_lock(&kvm->slots_lock);
3160 spin_lock(&kvm->mmu_lock);
3161
3162 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3163 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3164
3165 spin_unlock(&kvm->mmu_lock);
3166 mutex_unlock(&kvm->slots_lock);
3167 return 0;
3168}
3169
3170static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3171{
3172 return kvm->arch.n_max_mmu_pages;
3173}
3174
3175static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3176{
3177 int r;
3178
3179 r = 0;
3180 switch (chip->chip_id) {
3181 case KVM_IRQCHIP_PIC_MASTER:
3182 memcpy(&chip->chip.pic,
3183 &pic_irqchip(kvm)->pics[0],
3184 sizeof(struct kvm_pic_state));
3185 break;
3186 case KVM_IRQCHIP_PIC_SLAVE:
3187 memcpy(&chip->chip.pic,
3188 &pic_irqchip(kvm)->pics[1],
3189 sizeof(struct kvm_pic_state));
3190 break;
3191 case KVM_IRQCHIP_IOAPIC:
3192 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3193 break;
3194 default:
3195 r = -EINVAL;
3196 break;
3197 }
3198 return r;
3199}
3200
3201static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3202{
3203 int r;
3204
3205 r = 0;
3206 switch (chip->chip_id) {
3207 case KVM_IRQCHIP_PIC_MASTER:
3208 spin_lock(&pic_irqchip(kvm)->lock);
3209 memcpy(&pic_irqchip(kvm)->pics[0],
3210 &chip->chip.pic,
3211 sizeof(struct kvm_pic_state));
3212 spin_unlock(&pic_irqchip(kvm)->lock);
3213 break;
3214 case KVM_IRQCHIP_PIC_SLAVE:
3215 spin_lock(&pic_irqchip(kvm)->lock);
3216 memcpy(&pic_irqchip(kvm)->pics[1],
3217 &chip->chip.pic,
3218 sizeof(struct kvm_pic_state));
3219 spin_unlock(&pic_irqchip(kvm)->lock);
3220 break;
3221 case KVM_IRQCHIP_IOAPIC:
3222 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3223 break;
3224 default:
3225 r = -EINVAL;
3226 break;
3227 }
3228 kvm_pic_update_irq(pic_irqchip(kvm));
3229 return r;
3230}
3231
3232static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3233{
3234 int r = 0;
3235
3236 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3237 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3238 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3239 return r;
3240}
3241
3242static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3243{
3244 int r = 0;
3245
3246 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3247 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3248 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3249 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3250 return r;
3251}
3252
3253static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3254{
3255 int r = 0;
3256
3257 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3258 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3259 sizeof(ps->channels));
3260 ps->flags = kvm->arch.vpit->pit_state.flags;
3261 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3262 memset(&ps->reserved, 0, sizeof(ps->reserved));
3263 return r;
3264}
3265
3266static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3267{
3268 int r = 0, start = 0;
3269 u32 prev_legacy, cur_legacy;
3270 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3271 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3272 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3273 if (!prev_legacy && cur_legacy)
3274 start = 1;
3275 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3276 sizeof(kvm->arch.vpit->pit_state.channels));
3277 kvm->arch.vpit->pit_state.flags = ps->flags;
3278 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3279 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3280 return r;
3281}
3282
3283static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3284 struct kvm_reinject_control *control)
3285{
3286 if (!kvm->arch.vpit)
3287 return -ENXIO;
3288 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3289 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
3290 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3291 return 0;
3292}
3293
3294
3295
3296
3297int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3298 struct kvm_dirty_log *log)
3299{
3300 int r, i;
3301 struct kvm_memory_slot *memslot;
3302 unsigned long n;
3303 unsigned long is_dirty = 0;
3304
3305 mutex_lock(&kvm->slots_lock);
3306
3307 r = -EINVAL;
3308 if (log->slot >= KVM_MEMORY_SLOTS)
3309 goto out;
3310
3311 memslot = &kvm->memslots->memslots[log->slot];
3312 r = -ENOENT;
3313 if (!memslot->dirty_bitmap)
3314 goto out;
3315
3316 n = kvm_dirty_bitmap_bytes(memslot);
3317
3318 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
3319 is_dirty = memslot->dirty_bitmap[i];
3320
3321
3322 if (is_dirty) {
3323 struct kvm_memslots *slots, *old_slots;
3324 unsigned long *dirty_bitmap;
3325
3326 dirty_bitmap = memslot->dirty_bitmap_head;
3327 if (memslot->dirty_bitmap == dirty_bitmap)
3328 dirty_bitmap += n / sizeof(long);
3329 memset(dirty_bitmap, 0, n);
3330
3331 r = -ENOMEM;
3332 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
3333 if (!slots)
3334 goto out;
3335 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
3336 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
3337 slots->generation++;
3338
3339 old_slots = kvm->memslots;
3340 rcu_assign_pointer(kvm->memslots, slots);
3341 synchronize_srcu_expedited(&kvm->srcu);
3342 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
3343 kfree(old_slots);
3344
3345 spin_lock(&kvm->mmu_lock);
3346 kvm_mmu_slot_remove_write_access(kvm, log->slot);
3347 spin_unlock(&kvm->mmu_lock);
3348
3349 r = -EFAULT;
3350 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
3351 goto out;
3352 } else {
3353 r = -EFAULT;
3354 if (clear_user(log->dirty_bitmap, n))
3355 goto out;
3356 }
3357
3358 r = 0;
3359out:
3360 mutex_unlock(&kvm->slots_lock);
3361 return r;
3362}
3363
3364long kvm_arch_vm_ioctl(struct file *filp,
3365 unsigned int ioctl, unsigned long arg)
3366{
3367 struct kvm *kvm = filp->private_data;
3368 void __user *argp = (void __user *)arg;
3369 int r = -ENOTTY;
3370
3371
3372
3373
3374
3375 union {
3376 struct kvm_pit_state ps;
3377 struct kvm_pit_state2 ps2;
3378 struct kvm_pit_config pit_config;
3379 } u;
3380
3381 switch (ioctl) {
3382 case KVM_SET_TSS_ADDR:
3383 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3384 if (r < 0)
3385 goto out;
3386 break;
3387 case KVM_SET_IDENTITY_MAP_ADDR: {
3388 u64 ident_addr;
3389
3390 r = -EFAULT;
3391 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3392 goto out;
3393 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3394 if (r < 0)
3395 goto out;
3396 break;
3397 }
3398 case KVM_SET_NR_MMU_PAGES:
3399 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3400 if (r)
3401 goto out;
3402 break;
3403 case KVM_GET_NR_MMU_PAGES:
3404 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3405 break;
3406 case KVM_CREATE_IRQCHIP: {
3407 struct kvm_pic *vpic;
3408
3409 mutex_lock(&kvm->lock);
3410 r = -EEXIST;
3411 if (kvm->arch.vpic)
3412 goto create_irqchip_unlock;
3413 r = -ENOMEM;
3414 vpic = kvm_create_pic(kvm);
3415 if (vpic) {
3416 r = kvm_ioapic_init(kvm);
3417 if (r) {
3418 mutex_lock(&kvm->slots_lock);
3419 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3420 &vpic->dev);
3421 mutex_unlock(&kvm->slots_lock);
3422 kfree(vpic);
3423 goto create_irqchip_unlock;
3424 }
3425 } else
3426 goto create_irqchip_unlock;
3427 smp_wmb();
3428 kvm->arch.vpic = vpic;
3429 smp_wmb();
3430 r = kvm_setup_default_irq_routing(kvm);
3431 if (r) {
3432 mutex_lock(&kvm->slots_lock);
3433 mutex_lock(&kvm->irq_lock);
3434 kvm_ioapic_destroy(kvm);
3435 kvm_destroy_pic(kvm);
3436 mutex_unlock(&kvm->irq_lock);
3437 mutex_unlock(&kvm->slots_lock);
3438 }
3439 create_irqchip_unlock:
3440 mutex_unlock(&kvm->lock);
3441 break;
3442 }
3443 case KVM_CREATE_PIT:
3444 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3445 goto create_pit;
3446 case KVM_CREATE_PIT2:
3447 r = -EFAULT;
3448 if (copy_from_user(&u.pit_config, argp,
3449 sizeof(struct kvm_pit_config)))
3450 goto out;
3451 create_pit:
3452 mutex_lock(&kvm->slots_lock);
3453 r = -EEXIST;
3454 if (kvm->arch.vpit)
3455 goto create_pit_unlock;
3456 r = -ENOMEM;
3457 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3458 if (kvm->arch.vpit)
3459 r = 0;
3460 create_pit_unlock:
3461 mutex_unlock(&kvm->slots_lock);
3462 break;
3463 case KVM_IRQ_LINE_STATUS:
3464 case KVM_IRQ_LINE: {
3465 struct kvm_irq_level irq_event;
3466
3467 r = -EFAULT;
3468 if (copy_from_user(&irq_event, argp, sizeof irq_event))
3469 goto out;
3470 r = -ENXIO;
3471 if (irqchip_in_kernel(kvm)) {
3472 __s32 status;
3473 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3474 irq_event.irq, irq_event.level);
3475 if (ioctl == KVM_IRQ_LINE_STATUS) {
3476 r = -EFAULT;
3477 irq_event.status = status;
3478 if (copy_to_user(argp, &irq_event,
3479 sizeof irq_event))
3480 goto out;
3481 }
3482 r = 0;
3483 }
3484 break;
3485 }
3486 case KVM_GET_IRQCHIP: {
3487
3488 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
3489
3490 r = -ENOMEM;
3491 if (!chip)
3492 goto out;
3493 r = -EFAULT;
3494 if (copy_from_user(chip, argp, sizeof *chip))
3495 goto get_irqchip_out;
3496 r = -ENXIO;
3497 if (!irqchip_in_kernel(kvm))
3498 goto get_irqchip_out;
3499 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3500 if (r)
3501 goto get_irqchip_out;
3502 r = -EFAULT;
3503 if (copy_to_user(argp, chip, sizeof *chip))
3504 goto get_irqchip_out;
3505 r = 0;
3506 get_irqchip_out:
3507 kfree(chip);
3508 if (r)
3509 goto out;
3510 break;
3511 }
3512 case KVM_SET_IRQCHIP: {
3513
3514 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
3515
3516 r = -ENOMEM;
3517 if (!chip)
3518 goto out;
3519 r = -EFAULT;
3520 if (copy_from_user(chip, argp, sizeof *chip))
3521 goto set_irqchip_out;
3522 r = -ENXIO;
3523 if (!irqchip_in_kernel(kvm))
3524 goto set_irqchip_out;
3525 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3526 if (r)
3527 goto set_irqchip_out;
3528 r = 0;
3529 set_irqchip_out:
3530 kfree(chip);
3531 if (r)
3532 goto out;
3533 break;
3534 }
3535 case KVM_GET_PIT: {
3536 r = -EFAULT;
3537 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3538 goto out;
3539 r = -ENXIO;
3540 if (!kvm->arch.vpit)
3541 goto out;
3542 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3543 if (r)
3544 goto out;
3545 r = -EFAULT;
3546 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3547 goto out;
3548 r = 0;
3549 break;
3550 }
3551 case KVM_SET_PIT: {
3552 r = -EFAULT;
3553 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3554 goto out;
3555 r = -ENXIO;
3556 if (!kvm->arch.vpit)
3557 goto out;
3558 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3559 if (r)
3560 goto out;
3561 r = 0;
3562 break;
3563 }
3564 case KVM_GET_PIT2: {
3565 r = -ENXIO;
3566 if (!kvm->arch.vpit)
3567 goto out;
3568 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3569 if (r)
3570 goto out;
3571 r = -EFAULT;
3572 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3573 goto out;
3574 r = 0;
3575 break;
3576 }
3577 case KVM_SET_PIT2: {
3578 r = -EFAULT;
3579 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3580 goto out;
3581 r = -ENXIO;
3582 if (!kvm->arch.vpit)
3583 goto out;
3584 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3585 if (r)
3586 goto out;
3587 r = 0;
3588 break;
3589 }
3590 case KVM_REINJECT_CONTROL: {
3591 struct kvm_reinject_control control;
3592 r = -EFAULT;
3593 if (copy_from_user(&control, argp, sizeof(control)))
3594 goto out;
3595 r = kvm_vm_ioctl_reinject(kvm, &control);
3596 if (r)
3597 goto out;
3598 r = 0;
3599 break;
3600 }
3601 case KVM_XEN_HVM_CONFIG: {
3602 r = -EFAULT;
3603 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3604 sizeof(struct kvm_xen_hvm_config)))
3605 goto out;
3606 r = -EINVAL;
3607 if (kvm->arch.xen_hvm_config.flags)
3608 goto out;
3609 r = 0;
3610 break;
3611 }
3612 case KVM_SET_CLOCK: {
3613 struct kvm_clock_data user_ns;
3614 u64 now_ns;
3615 s64 delta;
3616
3617 r = -EFAULT;
3618 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3619 goto out;
3620
3621 r = -EINVAL;
3622 if (user_ns.flags)
3623 goto out;
3624
3625 r = 0;
3626 local_irq_disable();
3627 now_ns = get_kernel_ns();
3628 delta = user_ns.clock - now_ns;
3629 local_irq_enable();
3630 kvm->arch.kvmclock_offset = delta;
3631 break;
3632 }
3633 case KVM_GET_CLOCK: {
3634 struct kvm_clock_data user_ns;
3635 u64 now_ns;
3636
3637 local_irq_disable();
3638 now_ns = get_kernel_ns();
3639 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3640 local_irq_enable();
3641 user_ns.flags = 0;
3642 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3643
3644 r = -EFAULT;
3645 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3646 goto out;
3647 r = 0;
3648 break;
3649 }
3650
3651 default:
3652 ;
3653 }
3654out:
3655 return r;
3656}
3657
3658static void kvm_init_msr_list(void)
3659{
3660 u32 dummy[2];
3661 unsigned i, j;
3662
3663
3664 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3665 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3666 continue;
3667 if (j < i)
3668 msrs_to_save[j] = msrs_to_save[i];
3669 j++;
3670 }
3671 num_msrs_to_save = j;
3672}
3673
3674static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3675 const void *v)
3676{
3677 int handled = 0;
3678 int n;
3679
3680 do {
3681 n = min(len, 8);
3682 if (!(vcpu->arch.apic &&
3683 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
3684 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3685 break;
3686 handled += n;
3687 addr += n;
3688 len -= n;
3689 v += n;
3690 } while (len);
3691
3692 return handled;
3693}
3694
3695static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3696{
3697 int handled = 0;
3698 int n;
3699
3700 do {
3701 n = min(len, 8);
3702 if (!(vcpu->arch.apic &&
3703 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
3704 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3705 break;
3706 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
3707 handled += n;
3708 addr += n;
3709 len -= n;
3710 v += n;
3711 } while (len);
3712
3713 return handled;
3714}
3715
3716static void kvm_set_segment(struct kvm_vcpu *vcpu,
3717 struct kvm_segment *var, int seg)
3718{
3719 kvm_x86_ops->set_segment(vcpu, var, seg);
3720}
3721
3722void kvm_get_segment(struct kvm_vcpu *vcpu,
3723 struct kvm_segment *var, int seg)
3724{
3725 kvm_x86_ops->get_segment(vcpu, var, seg);
3726}
3727
3728static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3729{
3730 return gpa;
3731}
3732
3733static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3734{
3735 gpa_t t_gpa;
3736 struct x86_exception exception;
3737
3738 BUG_ON(!mmu_is_nested(vcpu));
3739
3740
3741 access |= PFERR_USER_MASK;
3742 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
3743
3744 return t_gpa;
3745}
3746
3747gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
3748 struct x86_exception *exception)
3749{
3750 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3751 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3752}
3753
3754 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
3755 struct x86_exception *exception)
3756{
3757 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3758 access |= PFERR_FETCH_MASK;
3759 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3760}
3761
3762gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
3763 struct x86_exception *exception)
3764{
3765 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3766 access |= PFERR_WRITE_MASK;
3767 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3768}
3769
3770
3771gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
3772 struct x86_exception *exception)
3773{
3774 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
3775}
3776
3777static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3778 struct kvm_vcpu *vcpu, u32 access,
3779 struct x86_exception *exception)
3780{
3781 void *data = val;
3782 int r = X86EMUL_CONTINUE;
3783
3784 while (bytes) {
3785 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
3786 exception);
3787 unsigned offset = addr & (PAGE_SIZE-1);
3788 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3789 int ret;
3790
3791 if (gpa == UNMAPPED_GVA)
3792 return X86EMUL_PROPAGATE_FAULT;
3793 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3794 if (ret < 0) {
3795 r = X86EMUL_IO_NEEDED;
3796 goto out;
3797 }
3798
3799 bytes -= toread;
3800 data += toread;
3801 addr += toread;
3802 }
3803out:
3804 return r;
3805}
3806
3807
3808static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
3809 gva_t addr, void *val, unsigned int bytes,
3810 struct x86_exception *exception)
3811{
3812 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3813 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3814
3815 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3816 access | PFERR_FETCH_MASK,
3817 exception);
3818}
3819
3820static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
3821 gva_t addr, void *val, unsigned int bytes,
3822 struct x86_exception *exception)
3823{
3824 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3825 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3826
3827 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3828 exception);
3829}
3830
3831static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3832 gva_t addr, void *val, unsigned int bytes,
3833 struct x86_exception *exception)
3834{
3835 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3836 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
3837}
3838
3839static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3840 gva_t addr, void *val,
3841 unsigned int bytes,
3842 struct x86_exception *exception)
3843{
3844 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3845 void *data = val;
3846 int r = X86EMUL_CONTINUE;
3847
3848 while (bytes) {
3849 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
3850 PFERR_WRITE_MASK,
3851 exception);
3852 unsigned offset = addr & (PAGE_SIZE-1);
3853 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3854 int ret;
3855
3856 if (gpa == UNMAPPED_GVA)
3857 return X86EMUL_PROPAGATE_FAULT;
3858 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
3859 if (ret < 0) {
3860 r = X86EMUL_IO_NEEDED;
3861 goto out;
3862 }
3863
3864 bytes -= towrite;
3865 data += towrite;
3866 addr += towrite;
3867 }
3868out:
3869 return r;
3870}
3871
3872static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
3873 unsigned long addr,
3874 void *val,
3875 unsigned int bytes,
3876 struct x86_exception *exception)
3877{
3878 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3879 gpa_t gpa;
3880 int handled;
3881
3882 if (vcpu->mmio_read_completed) {
3883 memcpy(val, vcpu->mmio_data, bytes);
3884 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
3885 vcpu->mmio_phys_addr, *(u64 *)val);
3886 vcpu->mmio_read_completed = 0;
3887 return X86EMUL_CONTINUE;
3888 }
3889
3890 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception);
3891
3892 if (gpa == UNMAPPED_GVA)
3893 return X86EMUL_PROPAGATE_FAULT;
3894
3895
3896 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3897 goto mmio;
3898
3899 if (kvm_read_guest_virt(ctxt, addr, val, bytes, exception)
3900 == X86EMUL_CONTINUE)
3901 return X86EMUL_CONTINUE;
3902
3903mmio:
3904
3905
3906
3907 handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
3908
3909 if (handled == bytes)
3910 return X86EMUL_CONTINUE;
3911
3912 gpa += handled;
3913 bytes -= handled;
3914 val += handled;
3915
3916 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
3917
3918 vcpu->mmio_needed = 1;
3919 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3920 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3921 vcpu->mmio_size = bytes;
3922 vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
3923 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
3924 vcpu->mmio_index = 0;
3925
3926 return X86EMUL_IO_NEEDED;
3927}
3928
3929int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
3930 const void *val, int bytes)
3931{
3932 int ret;
3933
3934 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
3935 if (ret < 0)
3936 return 0;
3937 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
3938 return 1;
3939}
3940
3941static int emulator_write_emulated_onepage(unsigned long addr,
3942 const void *val,
3943 unsigned int bytes,
3944 struct x86_exception *exception,
3945 struct kvm_vcpu *vcpu)
3946{
3947 gpa_t gpa;
3948 int handled;
3949
3950 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
3951
3952 if (gpa == UNMAPPED_GVA)
3953 return X86EMUL_PROPAGATE_FAULT;
3954
3955
3956 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3957 goto mmio;
3958
3959 if (emulator_write_phys(vcpu, gpa, val, bytes))
3960 return X86EMUL_CONTINUE;
3961
3962mmio:
3963 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
3964
3965
3966
3967 handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
3968 if (handled == bytes)
3969 return X86EMUL_CONTINUE;
3970
3971 gpa += handled;
3972 bytes -= handled;
3973 val += handled;
3974
3975 vcpu->mmio_needed = 1;
3976 memcpy(vcpu->mmio_data, val, bytes);
3977 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3978 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3979 vcpu->mmio_size = bytes;
3980 vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
3981 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
3982 memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
3983 vcpu->mmio_index = 0;
3984
3985 return X86EMUL_CONTINUE;
3986}
3987
3988int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
3989 unsigned long addr,
3990 const void *val,
3991 unsigned int bytes,
3992 struct x86_exception *exception)
3993{
3994 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3995
3996
3997 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
3998 int rc, now;
3999
4000 now = -addr & ~PAGE_MASK;
4001 rc = emulator_write_emulated_onepage(addr, val, now, exception,
4002 vcpu);
4003 if (rc != X86EMUL_CONTINUE)
4004 return rc;
4005 addr += now;
4006 val += now;
4007 bytes -= now;
4008 }
4009 return emulator_write_emulated_onepage(addr, val, bytes, exception,
4010 vcpu);
4011}
4012
4013#define CMPXCHG_TYPE(t, ptr, old, new) \
4014 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4015
4016#ifdef CONFIG_X86_64
4017# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4018#else
4019# define CMPXCHG64(ptr, old, new) \
4020 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4021#endif
4022
4023static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4024 unsigned long addr,
4025 const void *old,
4026 const void *new,
4027 unsigned int bytes,
4028 struct x86_exception *exception)
4029{
4030 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4031 gpa_t gpa;
4032 struct page *page;
4033 char *kaddr;
4034 bool exchanged;
4035
4036
4037 if (bytes > 8 || (bytes & (bytes - 1)))
4038 goto emul_write;
4039
4040 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4041
4042 if (gpa == UNMAPPED_GVA ||
4043 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4044 goto emul_write;
4045
4046 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4047 goto emul_write;
4048
4049 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4050 if (is_error_page(page)) {
4051 kvm_release_page_clean(page);
4052 goto emul_write;
4053 }
4054
4055 kaddr = kmap_atomic(page, KM_USER0);
4056 kaddr += offset_in_page(gpa);
4057 switch (bytes) {
4058 case 1:
4059 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4060 break;
4061 case 2:
4062 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4063 break;
4064 case 4:
4065 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4066 break;
4067 case 8:
4068 exchanged = CMPXCHG64(kaddr, old, new);
4069 break;
4070 default:
4071 BUG();
4072 }
4073 kunmap_atomic(kaddr, KM_USER0);
4074 kvm_release_page_dirty(page);
4075
4076 if (!exchanged)
4077 return X86EMUL_CMPXCHG_FAILED;
4078
4079 kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
4080
4081 return X86EMUL_CONTINUE;
4082
4083emul_write:
4084 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4085
4086 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4087}
4088
4089static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4090{
4091
4092 int r;
4093
4094 if (vcpu->arch.pio.in)
4095 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
4096 vcpu->arch.pio.size, pd);
4097 else
4098 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
4099 vcpu->arch.pio.port, vcpu->arch.pio.size,
4100 pd);
4101 return r;
4102}
4103
4104
4105static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4106 int size, unsigned short port, void *val,
4107 unsigned int count)
4108{
4109 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4110
4111 if (vcpu->arch.pio.count)
4112 goto data_avail;
4113
4114 trace_kvm_pio(0, port, size, count);
4115
4116 vcpu->arch.pio.port = port;
4117 vcpu->arch.pio.in = 1;
4118 vcpu->arch.pio.count = count;
4119 vcpu->arch.pio.size = size;
4120
4121 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4122 data_avail:
4123 memcpy(val, vcpu->arch.pio_data, size * count);
4124 vcpu->arch.pio.count = 0;
4125 return 1;
4126 }
4127
4128 vcpu->run->exit_reason = KVM_EXIT_IO;
4129 vcpu->run->io.direction = KVM_EXIT_IO_IN;
4130 vcpu->run->io.size = size;
4131 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4132 vcpu->run->io.count = count;
4133 vcpu->run->io.port = port;
4134
4135 return 0;
4136}
4137
4138static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4139 int size, unsigned short port,
4140 const void *val, unsigned int count)
4141{
4142 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4143
4144 trace_kvm_pio(1, port, size, count);
4145
4146 vcpu->arch.pio.port = port;
4147 vcpu->arch.pio.in = 0;
4148 vcpu->arch.pio.count = count;
4149 vcpu->arch.pio.size = size;
4150
4151 memcpy(vcpu->arch.pio_data, val, size * count);
4152
4153 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4154 vcpu->arch.pio.count = 0;
4155 return 1;
4156 }
4157
4158 vcpu->run->exit_reason = KVM_EXIT_IO;
4159 vcpu->run->io.direction = KVM_EXIT_IO_OUT;
4160 vcpu->run->io.size = size;
4161 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4162 vcpu->run->io.count = count;
4163 vcpu->run->io.port = port;
4164
4165 return 0;
4166}
4167
4168static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4169{
4170 return kvm_x86_ops->get_segment_base(vcpu, seg);
4171}
4172
4173static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4174{
4175 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4176}
4177
4178int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4179{
4180 if (!need_emulate_wbinvd(vcpu))
4181 return X86EMUL_CONTINUE;
4182
4183 if (kvm_x86_ops->has_wbinvd_exit()) {
4184 int cpu = get_cpu();
4185
4186 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4187 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4188 wbinvd_ipi, NULL, 1);
4189 put_cpu();
4190 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4191 } else
4192 wbinvd();
4193 return X86EMUL_CONTINUE;
4194}
4195EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4196
4197static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4198{
4199 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4200}
4201
4202int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4203{
4204 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4205}
4206
4207int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4208{
4209
4210 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4211}
4212
4213static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4214{
4215 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4216}
4217
4218static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4219{
4220 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4221 unsigned long value;
4222
4223 switch (cr) {
4224 case 0:
4225 value = kvm_read_cr0(vcpu);
4226 break;
4227 case 2:
4228 value = vcpu->arch.cr2;
4229 break;
4230 case 3:
4231 value = kvm_read_cr3(vcpu);
4232 break;
4233 case 4:
4234 value = kvm_read_cr4(vcpu);
4235 break;
4236 case 8:
4237 value = kvm_get_cr8(vcpu);
4238 break;
4239 default:
4240 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4241 return 0;
4242 }
4243
4244 return value;
4245}
4246
4247static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4248{
4249 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4250 int res = 0;
4251
4252 switch (cr) {
4253 case 0:
4254 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4255 break;
4256 case 2:
4257 vcpu->arch.cr2 = val;
4258 break;
4259 case 3:
4260 res = kvm_set_cr3(vcpu, val);
4261 break;
4262 case 4:
4263 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4264 break;
4265 case 8:
4266 res = kvm_set_cr8(vcpu, val);
4267 break;
4268 default:
4269 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4270 res = -1;
4271 }
4272
4273 return res;
4274}
4275
4276static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4277{
4278 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4279}
4280
4281static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4282{
4283 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4284}
4285
4286static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4287{
4288 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4289}
4290
4291static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4292{
4293 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4294}
4295
4296static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4297{
4298 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4299}
4300
4301static unsigned long emulator_get_cached_segment_base(
4302 struct x86_emulate_ctxt *ctxt, int seg)
4303{
4304 return get_segment_base(emul_to_vcpu(ctxt), seg);
4305}
4306
4307static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4308 struct desc_struct *desc, u32 *base3,
4309 int seg)
4310{
4311 struct kvm_segment var;
4312
4313 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4314 *selector = var.selector;
4315
4316 if (var.unusable)
4317 return false;
4318
4319 if (var.g)
4320 var.limit >>= 12;
4321 set_desc_limit(desc, var.limit);
4322 set_desc_base(desc, (unsigned long)var.base);
4323#ifdef CONFIG_X86_64
4324 if (base3)
4325 *base3 = var.base >> 32;
4326#endif
4327 desc->type = var.type;
4328 desc->s = var.s;
4329 desc->dpl = var.dpl;
4330 desc->p = var.present;
4331 desc->avl = var.avl;
4332 desc->l = var.l;
4333 desc->d = var.db;
4334 desc->g = var.g;
4335
4336 return true;
4337}
4338
4339static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4340 struct desc_struct *desc, u32 base3,
4341 int seg)
4342{
4343 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4344 struct kvm_segment var;
4345
4346 var.selector = selector;
4347 var.base = get_desc_base(desc);
4348#ifdef CONFIG_X86_64
4349 var.base |= ((u64)base3) << 32;
4350#endif
4351 var.limit = get_desc_limit(desc);
4352 if (desc->g)
4353 var.limit = (var.limit << 12) | 0xfff;
4354 var.type = desc->type;
4355 var.present = desc->p;
4356 var.dpl = desc->dpl;
4357 var.db = desc->d;
4358 var.s = desc->s;
4359 var.l = desc->l;
4360 var.g = desc->g;
4361 var.avl = desc->avl;
4362 var.present = desc->p;
4363 var.unusable = !var.present;
4364 var.padding = 0;
4365
4366 kvm_set_segment(vcpu, &var, seg);
4367 return;
4368}
4369
4370static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4371 u32 msr_index, u64 *pdata)
4372{
4373 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4374}
4375
4376static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4377 u32 msr_index, u64 data)
4378{
4379 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
4380}
4381
4382static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4383{
4384 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4385}
4386
4387static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4388{
4389 preempt_disable();
4390 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4391
4392
4393
4394
4395 clts();
4396}
4397
4398static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4399{
4400 preempt_enable();
4401}
4402
4403static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4404 struct x86_instruction_info *info,
4405 enum x86_intercept_stage stage)
4406{
4407 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4408}
4409
4410static struct x86_emulate_ops emulate_ops = {
4411 .read_std = kvm_read_guest_virt_system,
4412 .write_std = kvm_write_guest_virt_system,
4413 .fetch = kvm_fetch_guest_virt,
4414 .read_emulated = emulator_read_emulated,
4415 .write_emulated = emulator_write_emulated,
4416 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4417 .invlpg = emulator_invlpg,
4418 .pio_in_emulated = emulator_pio_in_emulated,
4419 .pio_out_emulated = emulator_pio_out_emulated,
4420 .get_segment = emulator_get_segment,
4421 .set_segment = emulator_set_segment,
4422 .get_cached_segment_base = emulator_get_cached_segment_base,
4423 .get_gdt = emulator_get_gdt,
4424 .get_idt = emulator_get_idt,
4425 .set_gdt = emulator_set_gdt,
4426 .set_idt = emulator_set_idt,
4427 .get_cr = emulator_get_cr,
4428 .set_cr = emulator_set_cr,
4429 .cpl = emulator_get_cpl,
4430 .get_dr = emulator_get_dr,
4431 .set_dr = emulator_set_dr,
4432 .set_msr = emulator_set_msr,
4433 .get_msr = emulator_get_msr,
4434 .halt = emulator_halt,
4435 .wbinvd = emulator_wbinvd,
4436 .fix_hypercall = emulator_fix_hypercall,
4437 .get_fpu = emulator_get_fpu,
4438 .put_fpu = emulator_put_fpu,
4439 .intercept = emulator_intercept,
4440};
4441
4442static void cache_all_regs(struct kvm_vcpu *vcpu)
4443{
4444 kvm_register_read(vcpu, VCPU_REGS_RAX);
4445 kvm_register_read(vcpu, VCPU_REGS_RSP);
4446 kvm_register_read(vcpu, VCPU_REGS_RIP);
4447 vcpu->arch.regs_dirty = ~0;
4448}
4449
4450static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4451{
4452 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4453
4454
4455
4456
4457
4458
4459
4460 if (!(int_shadow & mask))
4461 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4462}
4463
4464static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4465{
4466 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4467 if (ctxt->exception.vector == PF_VECTOR)
4468 kvm_propagate_fault(vcpu, &ctxt->exception);
4469 else if (ctxt->exception.error_code_valid)
4470 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4471 ctxt->exception.error_code);
4472 else
4473 kvm_queue_exception(vcpu, ctxt->exception.vector);
4474}
4475
4476static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4477{
4478 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4479 int cs_db, cs_l;
4480
4481
4482
4483
4484
4485
4486
4487 cache_all_regs(vcpu);
4488
4489 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4490
4491 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
4492 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
4493 vcpu->arch.emulate_ctxt.mode =
4494 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4495 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
4496 ? X86EMUL_MODE_VM86 : cs_l
4497 ? X86EMUL_MODE_PROT64 : cs_db
4498 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
4499 vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu);
4500 memset(c, 0, sizeof(struct decode_cache));
4501 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4502 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4503}
4504
4505int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4506{
4507 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4508 int ret;
4509
4510 init_emulate_ctxt(vcpu);
4511
4512 vcpu->arch.emulate_ctxt.decode.op_bytes = 2;
4513 vcpu->arch.emulate_ctxt.decode.ad_bytes = 2;
4514 vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip +
4515 inc_eip;
4516 ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
4517
4518 if (ret != X86EMUL_CONTINUE)
4519 return EMULATE_FAIL;
4520
4521 vcpu->arch.emulate_ctxt.eip = c->eip;
4522 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4523 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4524 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4525
4526 if (irq == NMI_VECTOR)
4527 vcpu->arch.nmi_pending = false;
4528 else
4529 vcpu->arch.interrupt.pending = false;
4530
4531 return EMULATE_DONE;
4532}
4533EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4534
4535static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4536{
4537 int r = EMULATE_DONE;
4538
4539 ++vcpu->stat.insn_emulation_fail;
4540 trace_kvm_emulate_insn_failed(vcpu);
4541 if (!is_guest_mode(vcpu)) {
4542 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4543 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4544 vcpu->run->internal.ndata = 0;
4545 r = EMULATE_FAIL;
4546 }
4547 kvm_queue_exception(vcpu, UD_VECTOR);
4548
4549 return r;
4550}
4551
4552static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4553{
4554 gpa_t gpa;
4555
4556 if (tdp_enabled)
4557 return false;
4558
4559
4560
4561
4562
4563
4564 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4565 return true;
4566
4567 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4568
4569 if (gpa == UNMAPPED_GVA)
4570 return true;
4571
4572 if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
4573 return true;
4574
4575 return false;
4576}
4577
4578int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4579 unsigned long cr2,
4580 int emulation_type,
4581 void *insn,
4582 int insn_len)
4583{
4584 int r;
4585 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4586 bool writeback = true;
4587
4588 kvm_clear_exception_queue(vcpu);
4589
4590 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4591 init_emulate_ctxt(vcpu);
4592 vcpu->arch.emulate_ctxt.interruptibility = 0;
4593 vcpu->arch.emulate_ctxt.have_exception = false;
4594 vcpu->arch.emulate_ctxt.perm_ok = false;
4595
4596 vcpu->arch.emulate_ctxt.only_vendor_specific_insn
4597 = emulation_type & EMULTYPE_TRAP_UD;
4598
4599 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len);
4600
4601 trace_kvm_emulate_insn_start(vcpu);
4602 ++vcpu->stat.insn_emulation;
4603 if (r) {
4604 if (emulation_type & EMULTYPE_TRAP_UD)
4605 return EMULATE_FAIL;
4606 if (reexecute_instruction(vcpu, cr2))
4607 return EMULATE_DONE;
4608 if (emulation_type & EMULTYPE_SKIP)
4609 return EMULATE_FAIL;
4610 return handle_emulation_failure(vcpu);
4611 }
4612 }
4613
4614 if (emulation_type & EMULTYPE_SKIP) {
4615 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
4616 return EMULATE_DONE;
4617 }
4618
4619
4620
4621 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
4622 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4623 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4624 }
4625
4626restart:
4627 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
4628
4629 if (r == EMULATION_INTERCEPTED)
4630 return EMULATE_DONE;
4631
4632 if (r == EMULATION_FAILED) {
4633 if (reexecute_instruction(vcpu, cr2))
4634 return EMULATE_DONE;
4635
4636 return handle_emulation_failure(vcpu);
4637 }
4638
4639 if (vcpu->arch.emulate_ctxt.have_exception) {
4640 inject_emulated_exception(vcpu);
4641 r = EMULATE_DONE;
4642 } else if (vcpu->arch.pio.count) {
4643 if (!vcpu->arch.pio.in)
4644 vcpu->arch.pio.count = 0;
4645 else
4646 writeback = false;
4647 r = EMULATE_DO_MMIO;
4648 } else if (vcpu->mmio_needed) {
4649 if (!vcpu->mmio_is_write)
4650 writeback = false;
4651 r = EMULATE_DO_MMIO;
4652 } else if (r == EMULATION_RESTART)
4653 goto restart;
4654 else
4655 r = EMULATE_DONE;
4656
4657 if (writeback) {
4658 toggle_interruptibility(vcpu,
4659 vcpu->arch.emulate_ctxt.interruptibility);
4660 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4661 kvm_make_request(KVM_REQ_EVENT, vcpu);
4662 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4663 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
4664 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4665 } else
4666 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4667
4668 return r;
4669}
4670EXPORT_SYMBOL_GPL(x86_emulate_instruction);
4671
4672int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4673{
4674 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
4675 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
4676 size, port, &val, 1);
4677
4678 vcpu->arch.pio.count = 0;
4679 return ret;
4680}
4681EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4682
4683static void tsc_bad(void *info)
4684{
4685 __this_cpu_write(cpu_tsc_khz, 0);
4686}
4687
4688static void tsc_khz_changed(void *data)
4689{
4690 struct cpufreq_freqs *freq = data;
4691 unsigned long khz = 0;
4692
4693 if (data)
4694 khz = freq->new;
4695 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4696 khz = cpufreq_quick_get(raw_smp_processor_id());
4697 if (!khz)
4698 khz = tsc_khz;
4699 __this_cpu_write(cpu_tsc_khz, khz);
4700}
4701
4702static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
4703 void *data)
4704{
4705 struct cpufreq_freqs *freq = data;
4706 struct kvm *kvm;
4707 struct kvm_vcpu *vcpu;
4708 int i, send_ipi = 0;
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
4750 return 0;
4751 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
4752 return 0;
4753
4754 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4755
4756 raw_spin_lock(&kvm_lock);
4757 list_for_each_entry(kvm, &vm_list, vm_list) {
4758 kvm_for_each_vcpu(i, vcpu, kvm) {
4759 if (vcpu->cpu != freq->cpu)
4760 continue;
4761 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4762 if (vcpu->cpu != smp_processor_id())
4763 send_ipi = 1;
4764 }
4765 }
4766 raw_spin_unlock(&kvm_lock);
4767
4768 if (freq->old < freq->new && send_ipi) {
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4782 }
4783 return 0;
4784}
4785
4786static struct notifier_block kvmclock_cpufreq_notifier_block = {
4787 .notifier_call = kvmclock_cpufreq_notifier
4788};
4789
4790static int kvmclock_cpu_notifier(struct notifier_block *nfb,
4791 unsigned long action, void *hcpu)
4792{
4793 unsigned int cpu = (unsigned long)hcpu;
4794
4795 switch (action) {
4796 case CPU_ONLINE:
4797 case CPU_DOWN_FAILED:
4798 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4799 break;
4800 case CPU_DOWN_PREPARE:
4801 smp_call_function_single(cpu, tsc_bad, NULL, 1);
4802 break;
4803 }
4804 return NOTIFY_OK;
4805}
4806
4807static struct notifier_block kvmclock_cpu_notifier_block = {
4808 .notifier_call = kvmclock_cpu_notifier,
4809 .priority = -INT_MAX
4810};
4811
4812static void kvm_timer_init(void)
4813{
4814 int cpu;
4815
4816 max_tsc_khz = tsc_khz;
4817 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4818 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
4819#ifdef CONFIG_CPU_FREQ
4820 struct cpufreq_policy policy;
4821 memset(&policy, 0, sizeof(policy));
4822 cpu = get_cpu();
4823 cpufreq_get_policy(&policy, cpu);
4824 if (policy.cpuinfo.max_freq)
4825 max_tsc_khz = policy.cpuinfo.max_freq;
4826 put_cpu();
4827#endif
4828 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4829 CPUFREQ_TRANSITION_NOTIFIER);
4830 }
4831 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
4832 for_each_online_cpu(cpu)
4833 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4834}
4835
4836static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
4837
4838static int kvm_is_in_guest(void)
4839{
4840 return percpu_read(current_vcpu) != NULL;
4841}
4842
4843static int kvm_is_user_mode(void)
4844{
4845 int user_mode = 3;
4846
4847 if (percpu_read(current_vcpu))
4848 user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
4849
4850 return user_mode != 0;
4851}
4852
4853static unsigned long kvm_get_guest_ip(void)
4854{
4855 unsigned long ip = 0;
4856
4857 if (percpu_read(current_vcpu))
4858 ip = kvm_rip_read(percpu_read(current_vcpu));
4859
4860 return ip;
4861}
4862
4863static struct perf_guest_info_callbacks kvm_guest_cbs = {
4864 .is_in_guest = kvm_is_in_guest,
4865 .is_user_mode = kvm_is_user_mode,
4866 .get_guest_ip = kvm_get_guest_ip,
4867};
4868
4869void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
4870{
4871 percpu_write(current_vcpu, vcpu);
4872}
4873EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
4874
4875void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
4876{
4877 percpu_write(current_vcpu, NULL);
4878}
4879EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
4880
4881int kvm_arch_init(void *opaque)
4882{
4883 int r;
4884 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
4885
4886 if (kvm_x86_ops) {
4887 printk(KERN_ERR "kvm: already loaded the other module\n");
4888 r = -EEXIST;
4889 goto out;
4890 }
4891
4892 if (!ops->cpu_has_kvm_support()) {
4893 printk(KERN_ERR "kvm: no hardware support\n");
4894 r = -EOPNOTSUPP;
4895 goto out;
4896 }
4897 if (ops->disabled_by_bios()) {
4898 printk(KERN_ERR "kvm: disabled by bios\n");
4899 r = -EOPNOTSUPP;
4900 goto out;
4901 }
4902
4903 r = kvm_mmu_module_init();
4904 if (r)
4905 goto out;
4906
4907 kvm_init_msr_list();
4908
4909 kvm_x86_ops = ops;
4910 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
4911 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
4912 PT_DIRTY_MASK, PT64_NX_MASK, 0);
4913
4914 kvm_timer_init();
4915
4916 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4917
4918 if (cpu_has_xsave)
4919 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
4920
4921 return 0;
4922
4923out:
4924 return r;
4925}
4926
4927void kvm_arch_exit(void)
4928{
4929 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
4930
4931 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4932 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4933 CPUFREQ_TRANSITION_NOTIFIER);
4934 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4935 kvm_x86_ops = NULL;
4936 kvm_mmu_module_exit();
4937}
4938
4939int kvm_emulate_halt(struct kvm_vcpu *vcpu)
4940{
4941 ++vcpu->stat.halt_exits;
4942 if (irqchip_in_kernel(vcpu->kvm)) {
4943 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
4944 return 1;
4945 } else {
4946 vcpu->run->exit_reason = KVM_EXIT_HLT;
4947 return 0;
4948 }
4949}
4950EXPORT_SYMBOL_GPL(kvm_emulate_halt);
4951
4952static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
4953 unsigned long a1)
4954{
4955 if (is_long_mode(vcpu))
4956 return a0;
4957 else
4958 return a0 | ((gpa_t)a1 << 32);
4959}
4960
4961int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
4962{
4963 u64 param, ingpa, outgpa, ret;
4964 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
4965 bool fast, longmode;
4966 int cs_db, cs_l;
4967
4968
4969
4970
4971
4972 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
4973 kvm_queue_exception(vcpu, UD_VECTOR);
4974 return 0;
4975 }
4976
4977 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4978 longmode = is_long_mode(vcpu) && cs_l == 1;
4979
4980 if (!longmode) {
4981 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
4982 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
4983 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
4984 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
4985 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
4986 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
4987 }
4988#ifdef CONFIG_X86_64
4989 else {
4990 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
4991 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
4992 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
4993 }
4994#endif
4995
4996 code = param & 0xffff;
4997 fast = (param >> 16) & 0x1;
4998 rep_cnt = (param >> 32) & 0xfff;
4999 rep_idx = (param >> 48) & 0xfff;
5000
5001 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
5002
5003 switch (code) {
5004 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5005 kvm_vcpu_on_spin(vcpu);
5006 break;
5007 default:
5008 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5009 break;
5010 }
5011
5012 ret = res | (((u64)rep_done & 0xfff) << 32);
5013 if (longmode) {
5014 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5015 } else {
5016 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5017 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5018 }
5019
5020 return 1;
5021}
5022
5023int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5024{
5025 unsigned long nr, a0, a1, a2, a3, ret;
5026 int r = 1;
5027
5028 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5029 return kvm_hv_hypercall(vcpu);
5030
5031 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5032 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5033 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5034 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5035 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5036
5037 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5038
5039 if (!is_long_mode(vcpu)) {
5040 nr &= 0xFFFFFFFF;
5041 a0 &= 0xFFFFFFFF;
5042 a1 &= 0xFFFFFFFF;
5043 a2 &= 0xFFFFFFFF;
5044 a3 &= 0xFFFFFFFF;
5045 }
5046
5047 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5048 ret = -KVM_EPERM;
5049 goto out;
5050 }
5051
5052 switch (nr) {
5053 case KVM_HC_VAPIC_POLL_IRQ:
5054 ret = 0;
5055 break;
5056 case KVM_HC_MMU_OP:
5057 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
5058 break;
5059 default:
5060 ret = -KVM_ENOSYS;
5061 break;
5062 }
5063out:
5064 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5065 ++vcpu->stat.hypercalls;
5066 return r;
5067}
5068EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5069
5070int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5071{
5072 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5073 char instruction[3];
5074 unsigned long rip = kvm_rip_read(vcpu);
5075
5076
5077
5078
5079
5080
5081 kvm_mmu_zap_all(vcpu->kvm);
5082
5083 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5084
5085 return emulator_write_emulated(&vcpu->arch.emulate_ctxt,
5086 rip, instruction, 3, NULL);
5087}
5088
5089static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
5090{
5091 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
5092 int j, nent = vcpu->arch.cpuid_nent;
5093
5094 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
5095
5096 for (j = i + 1; ; j = (j + 1) % nent) {
5097 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
5098 if (ej->function == e->function) {
5099 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
5100 return j;
5101 }
5102 }
5103 return 0;
5104}
5105
5106
5107
5108static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
5109 u32 function, u32 index)
5110{
5111 if (e->function != function)
5112 return 0;
5113 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
5114 return 0;
5115 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
5116 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
5117 return 0;
5118 return 1;
5119}
5120
5121struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
5122 u32 function, u32 index)
5123{
5124 int i;
5125 struct kvm_cpuid_entry2 *best = NULL;
5126
5127 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
5128 struct kvm_cpuid_entry2 *e;
5129
5130 e = &vcpu->arch.cpuid_entries[i];
5131 if (is_matching_cpuid_entry(e, function, index)) {
5132 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
5133 move_to_next_stateful_cpuid_entry(vcpu, i);
5134 best = e;
5135 break;
5136 }
5137 }
5138 return best;
5139}
5140EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
5141
5142int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
5143{
5144 struct kvm_cpuid_entry2 *best;
5145
5146 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
5147 if (!best || best->eax < 0x80000008)
5148 goto not_found;
5149 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
5150 if (best)
5151 return best->eax & 0xff;
5152not_found:
5153 return 36;
5154}
5155
5156
5157
5158
5159
5160
5161static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
5162 u32 function, u32 index)
5163{
5164 struct kvm_cpuid_entry2 *maxlevel;
5165
5166 maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
5167 if (!maxlevel || maxlevel->eax >= function)
5168 return NULL;
5169 if (function & 0x80000000) {
5170 maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
5171 if (!maxlevel)
5172 return NULL;
5173 }
5174 return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
5175}
5176
5177void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
5178{
5179 u32 function, index;
5180 struct kvm_cpuid_entry2 *best;
5181
5182 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
5183 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
5184 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
5185 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
5186 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
5187 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
5188 best = kvm_find_cpuid_entry(vcpu, function, index);
5189
5190 if (!best)
5191 best = check_cpuid_limit(vcpu, function, index);
5192
5193 if (best) {
5194 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
5195 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
5196 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
5197 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
5198 }
5199 kvm_x86_ops->skip_emulated_instruction(vcpu);
5200 trace_kvm_cpuid(function,
5201 kvm_register_read(vcpu, VCPU_REGS_RAX),
5202 kvm_register_read(vcpu, VCPU_REGS_RBX),
5203 kvm_register_read(vcpu, VCPU_REGS_RCX),
5204 kvm_register_read(vcpu, VCPU_REGS_RDX));
5205}
5206EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
5207
5208
5209
5210
5211
5212
5213
5214static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5215{
5216 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5217 vcpu->run->request_interrupt_window &&
5218 kvm_arch_interrupt_allowed(vcpu));
5219}
5220
5221static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5222{
5223 struct kvm_run *kvm_run = vcpu->run;
5224
5225 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5226 kvm_run->cr8 = kvm_get_cr8(vcpu);
5227 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5228 if (irqchip_in_kernel(vcpu->kvm))
5229 kvm_run->ready_for_interrupt_injection = 1;
5230 else
5231 kvm_run->ready_for_interrupt_injection =
5232 kvm_arch_interrupt_allowed(vcpu) &&
5233 !kvm_cpu_has_interrupt(vcpu) &&
5234 !kvm_event_needs_reinjection(vcpu);
5235}
5236
5237static void vapic_enter(struct kvm_vcpu *vcpu)
5238{
5239 struct kvm_lapic *apic = vcpu->arch.apic;
5240 struct page *page;
5241
5242 if (!apic || !apic->vapic_addr)
5243 return;
5244
5245 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5246
5247 vcpu->arch.apic->vapic_page = page;
5248}
5249
5250static void vapic_exit(struct kvm_vcpu *vcpu)
5251{
5252 struct kvm_lapic *apic = vcpu->arch.apic;
5253 int idx;
5254
5255 if (!apic || !apic->vapic_addr)
5256 return;
5257
5258 idx = srcu_read_lock(&vcpu->kvm->srcu);
5259 kvm_release_page_dirty(apic->vapic_page);
5260 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5261 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5262}
5263
5264static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5265{
5266 int max_irr, tpr;
5267
5268 if (!kvm_x86_ops->update_cr8_intercept)
5269 return;
5270
5271 if (!vcpu->arch.apic)
5272 return;
5273
5274 if (!vcpu->arch.apic->vapic_addr)
5275 max_irr = kvm_lapic_find_highest_irr(vcpu);
5276 else
5277 max_irr = -1;
5278
5279 if (max_irr != -1)
5280 max_irr >>= 4;
5281
5282 tpr = kvm_lapic_get_cr8(vcpu);
5283
5284 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5285}
5286
5287static void inject_pending_event(struct kvm_vcpu *vcpu)
5288{
5289
5290 if (vcpu->arch.exception.pending) {
5291 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5292 vcpu->arch.exception.has_error_code,
5293 vcpu->arch.exception.error_code);
5294 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5295 vcpu->arch.exception.has_error_code,
5296 vcpu->arch.exception.error_code,
5297 vcpu->arch.exception.reinject);
5298 return;
5299 }
5300
5301 if (vcpu->arch.nmi_injected) {
5302 kvm_x86_ops->set_nmi(vcpu);
5303 return;
5304 }
5305
5306 if (vcpu->arch.interrupt.pending) {
5307 kvm_x86_ops->set_irq(vcpu);
5308 return;
5309 }
5310
5311
5312 if (vcpu->arch.nmi_pending) {
5313 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5314 vcpu->arch.nmi_pending = false;
5315 vcpu->arch.nmi_injected = true;
5316 kvm_x86_ops->set_nmi(vcpu);
5317 }
5318 } else if (kvm_cpu_has_interrupt(vcpu)) {
5319 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5320 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5321 false);
5322 kvm_x86_ops->set_irq(vcpu);
5323 }
5324 }
5325}
5326
5327static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
5328{
5329 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
5330 !vcpu->guest_xcr0_loaded) {
5331
5332 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
5333 vcpu->guest_xcr0_loaded = 1;
5334 }
5335}
5336
5337static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
5338{
5339 if (vcpu->guest_xcr0_loaded) {
5340 if (vcpu->arch.xcr0 != host_xcr0)
5341 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
5342 vcpu->guest_xcr0_loaded = 0;
5343 }
5344}
5345
5346static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5347{
5348 int r;
5349 bool nmi_pending;
5350 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5351 vcpu->run->request_interrupt_window;
5352
5353 if (vcpu->requests) {
5354 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
5355 kvm_mmu_unload(vcpu);
5356 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5357 __kvm_migrate_timers(vcpu);
5358 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5359 r = kvm_guest_time_update(vcpu);
5360 if (unlikely(r))
5361 goto out;
5362 }
5363 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
5364 kvm_mmu_sync_roots(vcpu);
5365 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
5366 kvm_x86_ops->tlb_flush(vcpu);
5367 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
5368 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
5369 r = 0;
5370 goto out;
5371 }
5372 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
5373 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5374 r = 0;
5375 goto out;
5376 }
5377 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
5378 vcpu->fpu_active = 0;
5379 kvm_x86_ops->fpu_deactivate(vcpu);
5380 }
5381 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5382
5383 vcpu->arch.apf.halted = true;
5384 r = 1;
5385 goto out;
5386 }
5387 }
5388
5389 r = kvm_mmu_reload(vcpu);
5390 if (unlikely(r))
5391 goto out;
5392
5393
5394
5395
5396
5397
5398
5399 nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
5400
5401 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5402 inject_pending_event(vcpu);
5403
5404
5405 if (nmi_pending)
5406 kvm_x86_ops->enable_nmi_window(vcpu);
5407 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
5408 kvm_x86_ops->enable_irq_window(vcpu);
5409
5410 if (kvm_lapic_enabled(vcpu)) {
5411 update_cr8_intercept(vcpu);
5412 kvm_lapic_sync_to_vapic(vcpu);
5413 }
5414 }
5415
5416 preempt_disable();
5417
5418 kvm_x86_ops->prepare_guest_switch(vcpu);
5419 if (vcpu->fpu_active)
5420 kvm_load_guest_fpu(vcpu);
5421 kvm_load_guest_xcr0(vcpu);
5422
5423 vcpu->mode = IN_GUEST_MODE;
5424
5425
5426
5427
5428 smp_mb();
5429
5430 local_irq_disable();
5431
5432 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
5433 || need_resched() || signal_pending(current)) {
5434 vcpu->mode = OUTSIDE_GUEST_MODE;
5435 smp_wmb();
5436 local_irq_enable();
5437 preempt_enable();
5438 kvm_x86_ops->cancel_injection(vcpu);
5439 r = 1;
5440 goto out;
5441 }
5442
5443 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5444
5445 kvm_guest_enter();
5446
5447 if (unlikely(vcpu->arch.switch_db_regs)) {
5448 set_debugreg(0, 7);
5449 set_debugreg(vcpu->arch.eff_db[0], 0);
5450 set_debugreg(vcpu->arch.eff_db[1], 1);
5451 set_debugreg(vcpu->arch.eff_db[2], 2);
5452 set_debugreg(vcpu->arch.eff_db[3], 3);
5453 }
5454
5455 trace_kvm_entry(vcpu->vcpu_id);
5456 kvm_x86_ops->run(vcpu);
5457
5458
5459
5460
5461
5462
5463
5464
5465 if (hw_breakpoint_active())
5466 hw_breakpoint_restore();
5467
5468 kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
5469
5470 vcpu->mode = OUTSIDE_GUEST_MODE;
5471 smp_wmb();
5472 local_irq_enable();
5473
5474 ++vcpu->stat.exits;
5475
5476
5477
5478
5479
5480
5481
5482 barrier();
5483
5484 kvm_guest_exit();
5485
5486 preempt_enable();
5487
5488 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5489
5490
5491
5492
5493 if (unlikely(prof_on == KVM_PROFILING)) {
5494 unsigned long rip = kvm_rip_read(vcpu);
5495 profile_hit(KVM_PROFILING, (void *)rip);
5496 }
5497
5498
5499 kvm_lapic_sync_from_vapic(vcpu);
5500
5501 r = kvm_x86_ops->handle_exit(vcpu);
5502out:
5503 return r;
5504}
5505
5506
5507static int __vcpu_run(struct kvm_vcpu *vcpu)
5508{
5509 int r;
5510 struct kvm *kvm = vcpu->kvm;
5511
5512 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
5513 pr_debug("vcpu %d received sipi with vector # %x\n",
5514 vcpu->vcpu_id, vcpu->arch.sipi_vector);
5515 kvm_lapic_reset(vcpu);
5516 r = kvm_arch_vcpu_reset(vcpu);
5517 if (r)
5518 return r;
5519 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5520 }
5521
5522 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5523 vapic_enter(vcpu);
5524
5525 r = 1;
5526 while (r > 0) {
5527 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5528 !vcpu->arch.apf.halted)
5529 r = vcpu_enter_guest(vcpu);
5530 else {
5531 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5532 kvm_vcpu_block(vcpu);
5533 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5534 if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
5535 {
5536 switch(vcpu->arch.mp_state) {
5537 case KVM_MP_STATE_HALTED:
5538 vcpu->arch.mp_state =
5539 KVM_MP_STATE_RUNNABLE;
5540 case KVM_MP_STATE_RUNNABLE:
5541 vcpu->arch.apf.halted = false;
5542 break;
5543 case KVM_MP_STATE_SIPI_RECEIVED:
5544 default:
5545 r = -EINTR;
5546 break;
5547 }
5548 }
5549 }
5550
5551 if (r <= 0)
5552 break;
5553
5554 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
5555 if (kvm_cpu_has_pending_timer(vcpu))
5556 kvm_inject_pending_timer_irqs(vcpu);
5557
5558 if (dm_request_for_irq_injection(vcpu)) {
5559 r = -EINTR;
5560 vcpu->run->exit_reason = KVM_EXIT_INTR;
5561 ++vcpu->stat.request_irq_exits;
5562 }
5563
5564 kvm_check_async_pf_completion(vcpu);
5565
5566 if (signal_pending(current)) {
5567 r = -EINTR;
5568 vcpu->run->exit_reason = KVM_EXIT_INTR;
5569 ++vcpu->stat.signal_exits;
5570 }
5571 if (need_resched()) {
5572 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5573 kvm_resched(vcpu);
5574 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5575 }
5576 }
5577
5578 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5579
5580 vapic_exit(vcpu);
5581
5582 return r;
5583}
5584
5585static int complete_mmio(struct kvm_vcpu *vcpu)
5586{
5587 struct kvm_run *run = vcpu->run;
5588 int r;
5589
5590 if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
5591 return 1;
5592
5593 if (vcpu->mmio_needed) {
5594 vcpu->mmio_needed = 0;
5595 if (!vcpu->mmio_is_write)
5596 memcpy(vcpu->mmio_data + vcpu->mmio_index,
5597 run->mmio.data, 8);
5598 vcpu->mmio_index += 8;
5599 if (vcpu->mmio_index < vcpu->mmio_size) {
5600 run->exit_reason = KVM_EXIT_MMIO;
5601 run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
5602 memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
5603 run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
5604 run->mmio.is_write = vcpu->mmio_is_write;
5605 vcpu->mmio_needed = 1;
5606 return 0;
5607 }
5608 if (vcpu->mmio_is_write)
5609 return 1;
5610 vcpu->mmio_read_completed = 1;
5611 }
5612 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5613 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5614 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5615 if (r != EMULATE_DONE)
5616 return 0;
5617 return 1;
5618}
5619
5620int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5621{
5622 int r;
5623 sigset_t sigsaved;
5624
5625 if (!tsk_used_math(current) && init_fpu(current))
5626 return -ENOMEM;
5627
5628 if (vcpu->sigset_active)
5629 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
5630
5631 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
5632 kvm_vcpu_block(vcpu);
5633 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
5634 r = -EAGAIN;
5635 goto out;
5636 }
5637
5638
5639 if (!irqchip_in_kernel(vcpu->kvm)) {
5640 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
5641 r = -EINVAL;
5642 goto out;
5643 }
5644 }
5645
5646 r = complete_mmio(vcpu);
5647 if (r <= 0)
5648 goto out;
5649
5650 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
5651 kvm_register_write(vcpu, VCPU_REGS_RAX,
5652 kvm_run->hypercall.ret);
5653
5654 r = __vcpu_run(vcpu);
5655
5656out:
5657 post_kvm_run_save(vcpu);
5658 if (vcpu->sigset_active)
5659 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
5660
5661 return r;
5662}
5663
5664int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5665{
5666 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
5667
5668
5669
5670
5671
5672
5673
5674 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
5675 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
5676 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5677 }
5678 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5679 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5680 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5681 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5682 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
5683 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
5684 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
5685 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
5686#ifdef CONFIG_X86_64
5687 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
5688 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
5689 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
5690 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
5691 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
5692 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
5693 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
5694 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
5695#endif
5696
5697 regs->rip = kvm_rip_read(vcpu);
5698 regs->rflags = kvm_get_rflags(vcpu);
5699
5700 return 0;
5701}
5702
5703int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5704{
5705 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
5706 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5707
5708 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
5709 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
5710 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
5711 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
5712 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
5713 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
5714 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
5715 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
5716#ifdef CONFIG_X86_64
5717 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
5718 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
5719 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
5720 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
5721 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
5722 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
5723 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
5724 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
5725#endif
5726
5727 kvm_rip_write(vcpu, regs->rip);
5728 kvm_set_rflags(vcpu, regs->rflags);
5729
5730 vcpu->arch.exception.pending = false;
5731
5732 kvm_make_request(KVM_REQ_EVENT, vcpu);
5733
5734 return 0;
5735}
5736
5737void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
5738{
5739 struct kvm_segment cs;
5740
5741 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
5742 *db = cs.db;
5743 *l = cs.l;
5744}
5745EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
5746
5747int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
5748 struct kvm_sregs *sregs)
5749{
5750 struct desc_ptr dt;
5751
5752 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5753 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
5754 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
5755 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
5756 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
5757 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
5758
5759 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
5760 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
5761
5762 kvm_x86_ops->get_idt(vcpu, &dt);
5763 sregs->idt.limit = dt.size;
5764 sregs->idt.base = dt.address;
5765 kvm_x86_ops->get_gdt(vcpu, &dt);
5766 sregs->gdt.limit = dt.size;
5767 sregs->gdt.base = dt.address;
5768
5769 sregs->cr0 = kvm_read_cr0(vcpu);
5770 sregs->cr2 = vcpu->arch.cr2;
5771 sregs->cr3 = kvm_read_cr3(vcpu);
5772 sregs->cr4 = kvm_read_cr4(vcpu);
5773 sregs->cr8 = kvm_get_cr8(vcpu);
5774 sregs->efer = vcpu->arch.efer;
5775 sregs->apic_base = kvm_get_apic_base(vcpu);
5776
5777 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
5778
5779 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
5780 set_bit(vcpu->arch.interrupt.nr,
5781 (unsigned long *)sregs->interrupt_bitmap);
5782
5783 return 0;
5784}
5785
5786int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
5787 struct kvm_mp_state *mp_state)
5788{
5789 mp_state->mp_state = vcpu->arch.mp_state;
5790 return 0;
5791}
5792
5793int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
5794 struct kvm_mp_state *mp_state)
5795{
5796 vcpu->arch.mp_state = mp_state->mp_state;
5797 kvm_make_request(KVM_REQ_EVENT, vcpu);
5798 return 0;
5799}
5800
5801int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
5802 bool has_error_code, u32 error_code)
5803{
5804 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
5805 int ret;
5806
5807 init_emulate_ctxt(vcpu);
5808
5809 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt,
5810 tss_selector, reason, has_error_code,
5811 error_code);
5812
5813 if (ret)
5814 return EMULATE_FAIL;
5815
5816 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
5817 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
5818 kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
5819 kvm_make_request(KVM_REQ_EVENT, vcpu);
5820 return EMULATE_DONE;
5821}
5822EXPORT_SYMBOL_GPL(kvm_task_switch);
5823
5824int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5825 struct kvm_sregs *sregs)
5826{
5827 int mmu_reset_needed = 0;
5828 int pending_vec, max_bits, idx;
5829 struct desc_ptr dt;
5830
5831 dt.size = sregs->idt.limit;
5832 dt.address = sregs->idt.base;
5833 kvm_x86_ops->set_idt(vcpu, &dt);
5834 dt.size = sregs->gdt.limit;
5835 dt.address = sregs->gdt.base;
5836 kvm_x86_ops->set_gdt(vcpu, &dt);
5837
5838 vcpu->arch.cr2 = sregs->cr2;
5839 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
5840 vcpu->arch.cr3 = sregs->cr3;
5841 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
5842
5843 kvm_set_cr8(vcpu, sregs->cr8);
5844
5845 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
5846 kvm_x86_ops->set_efer(vcpu, sregs->efer);
5847 kvm_set_apic_base(vcpu, sregs->apic_base);
5848
5849 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
5850 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
5851 vcpu->arch.cr0 = sregs->cr0;
5852
5853 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5854 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5855 if (sregs->cr4 & X86_CR4_OSXSAVE)
5856 update_cpuid(vcpu);
5857
5858 idx = srcu_read_lock(&vcpu->kvm->srcu);
5859 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5860 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
5861 mmu_reset_needed = 1;
5862 }
5863 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5864
5865 if (mmu_reset_needed)
5866 kvm_mmu_reset_context(vcpu);
5867
5868 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
5869 pending_vec = find_first_bit(
5870 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
5871 if (pending_vec < max_bits) {
5872 kvm_queue_interrupt(vcpu, pending_vec, false);
5873 pr_debug("Set back pending irq %d\n", pending_vec);
5874 }
5875
5876 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5877 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
5878 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
5879 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
5880 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
5881 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
5882
5883 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
5884 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
5885
5886 update_cr8_intercept(vcpu);
5887
5888
5889 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
5890 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
5891 !is_protmode(vcpu))
5892 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5893
5894 kvm_make_request(KVM_REQ_EVENT, vcpu);
5895
5896 return 0;
5897}
5898
5899int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5900 struct kvm_guest_debug *dbg)
5901{
5902 unsigned long rflags;
5903 int i, r;
5904
5905 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
5906 r = -EBUSY;
5907 if (vcpu->arch.exception.pending)
5908 goto out;
5909 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
5910 kvm_queue_exception(vcpu, DB_VECTOR);
5911 else
5912 kvm_queue_exception(vcpu, BP_VECTOR);
5913 }
5914
5915
5916
5917
5918
5919 rflags = kvm_get_rflags(vcpu);
5920
5921 vcpu->guest_debug = dbg->control;
5922 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
5923 vcpu->guest_debug = 0;
5924
5925 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5926 for (i = 0; i < KVM_NR_DB_REGS; ++i)
5927 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
5928 vcpu->arch.switch_db_regs =
5929 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
5930 } else {
5931 for (i = 0; i < KVM_NR_DB_REGS; i++)
5932 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
5933 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
5934 }
5935
5936 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5937 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
5938 get_segment_base(vcpu, VCPU_SREG_CS);
5939
5940
5941
5942
5943
5944 kvm_set_rflags(vcpu, rflags);
5945
5946 kvm_x86_ops->set_guest_debug(vcpu, dbg);
5947
5948 r = 0;
5949
5950out:
5951
5952 return r;
5953}
5954
5955
5956
5957
5958int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
5959 struct kvm_translation *tr)
5960{
5961 unsigned long vaddr = tr->linear_address;
5962 gpa_t gpa;
5963 int idx;
5964
5965 idx = srcu_read_lock(&vcpu->kvm->srcu);
5966 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
5967 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5968 tr->physical_address = gpa;
5969 tr->valid = gpa != UNMAPPED_GVA;
5970 tr->writeable = 1;
5971 tr->usermode = 0;
5972
5973 return 0;
5974}
5975
5976int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5977{
5978 struct i387_fxsave_struct *fxsave =
5979 &vcpu->arch.guest_fpu.state->fxsave;
5980
5981 memcpy(fpu->fpr, fxsave->st_space, 128);
5982 fpu->fcw = fxsave->cwd;
5983 fpu->fsw = fxsave->swd;
5984 fpu->ftwx = fxsave->twd;
5985 fpu->last_opcode = fxsave->fop;
5986 fpu->last_ip = fxsave->rip;
5987 fpu->last_dp = fxsave->rdp;
5988 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
5989
5990 return 0;
5991}
5992
5993int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5994{
5995 struct i387_fxsave_struct *fxsave =
5996 &vcpu->arch.guest_fpu.state->fxsave;
5997
5998 memcpy(fxsave->st_space, fpu->fpr, 128);
5999 fxsave->cwd = fpu->fcw;
6000 fxsave->swd = fpu->fsw;
6001 fxsave->twd = fpu->ftwx;
6002 fxsave->fop = fpu->last_opcode;
6003 fxsave->rip = fpu->last_ip;
6004 fxsave->rdp = fpu->last_dp;
6005 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
6006
6007 return 0;
6008}
6009
6010int fx_init(struct kvm_vcpu *vcpu)
6011{
6012 int err;
6013
6014 err = fpu_alloc(&vcpu->arch.guest_fpu);
6015 if (err)
6016 return err;
6017
6018 fpu_finit(&vcpu->arch.guest_fpu);
6019
6020
6021
6022
6023 vcpu->arch.xcr0 = XSTATE_FP;
6024
6025 vcpu->arch.cr0 |= X86_CR0_ET;
6026
6027 return 0;
6028}
6029EXPORT_SYMBOL_GPL(fx_init);
6030
6031static void fx_free(struct kvm_vcpu *vcpu)
6032{
6033 fpu_free(&vcpu->arch.guest_fpu);
6034}
6035
6036void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
6037{
6038 if (vcpu->guest_fpu_loaded)
6039 return;
6040
6041
6042
6043
6044
6045
6046 kvm_put_guest_xcr0(vcpu);
6047 vcpu->guest_fpu_loaded = 1;
6048 unlazy_fpu(current);
6049 fpu_restore_checking(&vcpu->arch.guest_fpu);
6050 trace_kvm_fpu(1);
6051}
6052
6053void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6054{
6055 kvm_put_guest_xcr0(vcpu);
6056
6057 if (!vcpu->guest_fpu_loaded)
6058 return;
6059
6060 vcpu->guest_fpu_loaded = 0;
6061 fpu_save_init(&vcpu->arch.guest_fpu);
6062 ++vcpu->stat.fpu_reload;
6063 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
6064 trace_kvm_fpu(0);
6065}
6066
6067void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
6068{
6069 kvmclock_reset(vcpu);
6070
6071 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6072 fx_free(vcpu);
6073 kvm_x86_ops->vcpu_free(vcpu);
6074}
6075
6076struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
6077 unsigned int id)
6078{
6079 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
6080 printk_once(KERN_WARNING
6081 "kvm: SMP vm created on host with unstable TSC; "
6082 "guest TSC will not be reliable\n");
6083 return kvm_x86_ops->vcpu_create(kvm, id);
6084}
6085
6086int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6087{
6088 int r;
6089
6090 vcpu->arch.mtrr_state.have_fixed = 1;
6091 vcpu_load(vcpu);
6092 r = kvm_arch_vcpu_reset(vcpu);
6093 if (r == 0)
6094 r = kvm_mmu_setup(vcpu);
6095 vcpu_put(vcpu);
6096 if (r < 0)
6097 goto free_vcpu;
6098
6099 return 0;
6100free_vcpu:
6101 kvm_x86_ops->vcpu_free(vcpu);
6102 return r;
6103}
6104
6105void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6106{
6107 vcpu->arch.apf.msr_val = 0;
6108
6109 vcpu_load(vcpu);
6110 kvm_mmu_unload(vcpu);
6111 vcpu_put(vcpu);
6112
6113 fx_free(vcpu);
6114 kvm_x86_ops->vcpu_free(vcpu);
6115}
6116
6117int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
6118{
6119 vcpu->arch.nmi_pending = false;
6120 vcpu->arch.nmi_injected = false;
6121
6122 vcpu->arch.switch_db_regs = 0;
6123 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6124 vcpu->arch.dr6 = DR6_FIXED_1;
6125 vcpu->arch.dr7 = DR7_FIXED_1;
6126
6127 kvm_make_request(KVM_REQ_EVENT, vcpu);
6128 vcpu->arch.apf.msr_val = 0;
6129
6130 kvmclock_reset(vcpu);
6131
6132 kvm_clear_async_pf_completion_queue(vcpu);
6133 kvm_async_pf_hash_reset(vcpu);
6134 vcpu->arch.apf.halted = false;
6135
6136 return kvm_x86_ops->vcpu_reset(vcpu);
6137}
6138
6139int kvm_arch_hardware_enable(void *garbage)
6140{
6141 struct kvm *kvm;
6142 struct kvm_vcpu *vcpu;
6143 int i;
6144
6145 kvm_shared_msr_cpu_online();
6146 list_for_each_entry(kvm, &vm_list, vm_list)
6147 kvm_for_each_vcpu(i, vcpu, kvm)
6148 if (vcpu->cpu == smp_processor_id())
6149 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6150 return kvm_x86_ops->hardware_enable(garbage);
6151}
6152
6153void kvm_arch_hardware_disable(void *garbage)
6154{
6155 kvm_x86_ops->hardware_disable(garbage);
6156 drop_user_return_notifiers(garbage);
6157}
6158
6159int kvm_arch_hardware_setup(void)
6160{
6161 return kvm_x86_ops->hardware_setup();
6162}
6163
6164void kvm_arch_hardware_unsetup(void)
6165{
6166 kvm_x86_ops->hardware_unsetup();
6167}
6168
6169void kvm_arch_check_processor_compat(void *rtn)
6170{
6171 kvm_x86_ops->check_processor_compatibility(rtn);
6172}
6173
6174int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6175{
6176 struct page *page;
6177 struct kvm *kvm;
6178 int r;
6179
6180 BUG_ON(vcpu->kvm == NULL);
6181 kvm = vcpu->kvm;
6182
6183 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
6184 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
6185 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
6186 vcpu->arch.mmu.translate_gpa = translate_gpa;
6187 vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
6188 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
6189 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6190 else
6191 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
6192
6193 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
6194 if (!page) {
6195 r = -ENOMEM;
6196 goto fail;
6197 }
6198 vcpu->arch.pio_data = page_address(page);
6199
6200 kvm_init_tsc_catchup(vcpu, max_tsc_khz);
6201
6202 r = kvm_mmu_create(vcpu);
6203 if (r < 0)
6204 goto fail_free_pio_data;
6205
6206 if (irqchip_in_kernel(kvm)) {
6207 r = kvm_create_lapic(vcpu);
6208 if (r < 0)
6209 goto fail_mmu_destroy;
6210 }
6211
6212 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
6213 GFP_KERNEL);
6214 if (!vcpu->arch.mce_banks) {
6215 r = -ENOMEM;
6216 goto fail_free_lapic;
6217 }
6218 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
6219
6220 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
6221 goto fail_free_mce_banks;
6222
6223 kvm_async_pf_hash_reset(vcpu);
6224
6225 return 0;
6226fail_free_mce_banks:
6227 kfree(vcpu->arch.mce_banks);
6228fail_free_lapic:
6229 kvm_free_lapic(vcpu);
6230fail_mmu_destroy:
6231 kvm_mmu_destroy(vcpu);
6232fail_free_pio_data:
6233 free_page((unsigned long)vcpu->arch.pio_data);
6234fail:
6235 return r;
6236}
6237
6238void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
6239{
6240 int idx;
6241
6242 kfree(vcpu->arch.mce_banks);
6243 kvm_free_lapic(vcpu);
6244 idx = srcu_read_lock(&vcpu->kvm->srcu);
6245 kvm_mmu_destroy(vcpu);
6246 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6247 free_page((unsigned long)vcpu->arch.pio_data);
6248}
6249
6250int kvm_arch_init_vm(struct kvm *kvm)
6251{
6252 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6253 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6254
6255
6256 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
6257
6258 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6259
6260 return 0;
6261}
6262
6263static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
6264{
6265 vcpu_load(vcpu);
6266 kvm_mmu_unload(vcpu);
6267 vcpu_put(vcpu);
6268}
6269
6270static void kvm_free_vcpus(struct kvm *kvm)
6271{
6272 unsigned int i;
6273 struct kvm_vcpu *vcpu;
6274
6275
6276
6277
6278 kvm_for_each_vcpu(i, vcpu, kvm) {
6279 kvm_clear_async_pf_completion_queue(vcpu);
6280 kvm_unload_vcpu_mmu(vcpu);
6281 }
6282 kvm_for_each_vcpu(i, vcpu, kvm)
6283 kvm_arch_vcpu_free(vcpu);
6284
6285 mutex_lock(&kvm->lock);
6286 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
6287 kvm->vcpus[i] = NULL;
6288
6289 atomic_set(&kvm->online_vcpus, 0);
6290 mutex_unlock(&kvm->lock);
6291}
6292
6293void kvm_arch_sync_events(struct kvm *kvm)
6294{
6295 kvm_free_all_assigned_devices(kvm);
6296 kvm_free_pit(kvm);
6297}
6298
6299void kvm_arch_destroy_vm(struct kvm *kvm)
6300{
6301 kvm_iommu_unmap_guest(kvm);
6302 kfree(kvm->arch.vpic);
6303 kfree(kvm->arch.vioapic);
6304 kvm_free_vcpus(kvm);
6305 if (kvm->arch.apic_access_page)
6306 put_page(kvm->arch.apic_access_page);
6307 if (kvm->arch.ept_identity_pagetable)
6308 put_page(kvm->arch.ept_identity_pagetable);
6309}
6310
6311int kvm_arch_prepare_memory_region(struct kvm *kvm,
6312 struct kvm_memory_slot *memslot,
6313 struct kvm_memory_slot old,
6314 struct kvm_userspace_memory_region *mem,
6315 int user_alloc)
6316{
6317 int npages = memslot->npages;
6318 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6319
6320
6321 if (memslot->id >= KVM_MEMORY_SLOTS)
6322 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6323
6324
6325
6326
6327 if (!user_alloc) {
6328 if (npages && !old.rmap) {
6329 unsigned long userspace_addr;
6330
6331 down_write(¤t->mm->mmap_sem);
6332 userspace_addr = do_mmap(NULL, 0,
6333 npages * PAGE_SIZE,
6334 PROT_READ | PROT_WRITE,
6335 map_flags,
6336 0);
6337 up_write(¤t->mm->mmap_sem);
6338
6339 if (IS_ERR((void *)userspace_addr))
6340 return PTR_ERR((void *)userspace_addr);
6341
6342 memslot->userspace_addr = userspace_addr;
6343 }
6344 }
6345
6346
6347 return 0;
6348}
6349
6350void kvm_arch_commit_memory_region(struct kvm *kvm,
6351 struct kvm_userspace_memory_region *mem,
6352 struct kvm_memory_slot old,
6353 int user_alloc)
6354{
6355
6356 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6357
6358 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
6359 int ret;
6360
6361 down_write(¤t->mm->mmap_sem);
6362 ret = do_munmap(current->mm, old.userspace_addr,
6363 old.npages * PAGE_SIZE);
6364 up_write(¤t->mm->mmap_sem);
6365 if (ret < 0)
6366 printk(KERN_WARNING
6367 "kvm_vm_ioctl_set_memory_region: "
6368 "failed to munmap memory\n");
6369 }
6370
6371 if (!kvm->arch.n_requested_mmu_pages)
6372 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6373
6374 spin_lock(&kvm->mmu_lock);
6375 if (nr_mmu_pages)
6376 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6377 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6378 spin_unlock(&kvm->mmu_lock);
6379}
6380
6381void kvm_arch_flush_shadow(struct kvm *kvm)
6382{
6383 kvm_mmu_zap_all(kvm);
6384 kvm_reload_remote_mmus(kvm);
6385}
6386
6387int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6388{
6389 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6390 !vcpu->arch.apf.halted)
6391 || !list_empty_careful(&vcpu->async_pf.done)
6392 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6393 || vcpu->arch.nmi_pending ||
6394 (kvm_arch_interrupt_allowed(vcpu) &&
6395 kvm_cpu_has_interrupt(vcpu));
6396}
6397
6398void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
6399{
6400 int me;
6401 int cpu = vcpu->cpu;
6402
6403 if (waitqueue_active(&vcpu->wq)) {
6404 wake_up_interruptible(&vcpu->wq);
6405 ++vcpu->stat.halt_wakeup;
6406 }
6407
6408 me = get_cpu();
6409 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
6410 if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE)
6411 smp_send_reschedule(cpu);
6412 put_cpu();
6413}
6414
6415int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
6416{
6417 return kvm_x86_ops->interrupt_allowed(vcpu);
6418}
6419
6420bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
6421{
6422 unsigned long current_rip = kvm_rip_read(vcpu) +
6423 get_segment_base(vcpu, VCPU_SREG_CS);
6424
6425 return current_rip == linear_rip;
6426}
6427EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
6428
6429unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
6430{
6431 unsigned long rflags;
6432
6433 rflags = kvm_x86_ops->get_rflags(vcpu);
6434 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6435 rflags &= ~X86_EFLAGS_TF;
6436 return rflags;
6437}
6438EXPORT_SYMBOL_GPL(kvm_get_rflags);
6439
6440void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
6441{
6442 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
6443 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
6444 rflags |= X86_EFLAGS_TF;
6445 kvm_x86_ops->set_rflags(vcpu, rflags);
6446 kvm_make_request(KVM_REQ_EVENT, vcpu);
6447}
6448EXPORT_SYMBOL_GPL(kvm_set_rflags);
6449
6450void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
6451{
6452 int r;
6453
6454 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
6455 is_error_page(work->page))
6456 return;
6457
6458 r = kvm_mmu_reload(vcpu);
6459 if (unlikely(r))
6460 return;
6461
6462 if (!vcpu->arch.mmu.direct_map &&
6463 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
6464 return;
6465
6466 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
6467}
6468
6469static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
6470{
6471 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
6472}
6473
6474static inline u32 kvm_async_pf_next_probe(u32 key)
6475{
6476 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
6477}
6478
6479static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6480{
6481 u32 key = kvm_async_pf_hash_fn(gfn);
6482
6483 while (vcpu->arch.apf.gfns[key] != ~0)
6484 key = kvm_async_pf_next_probe(key);
6485
6486 vcpu->arch.apf.gfns[key] = gfn;
6487}
6488
6489static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
6490{
6491 int i;
6492 u32 key = kvm_async_pf_hash_fn(gfn);
6493
6494 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
6495 (vcpu->arch.apf.gfns[key] != gfn &&
6496 vcpu->arch.apf.gfns[key] != ~0); i++)
6497 key = kvm_async_pf_next_probe(key);
6498
6499 return key;
6500}
6501
6502bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6503{
6504 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
6505}
6506
6507static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6508{
6509 u32 i, j, k;
6510
6511 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
6512 while (true) {
6513 vcpu->arch.apf.gfns[i] = ~0;
6514 do {
6515 j = kvm_async_pf_next_probe(j);
6516 if (vcpu->arch.apf.gfns[j] == ~0)
6517 return;
6518 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
6519
6520
6521
6522
6523
6524 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
6525 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
6526 i = j;
6527 }
6528}
6529
6530static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
6531{
6532
6533 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
6534 sizeof(val));
6535}
6536
6537void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
6538 struct kvm_async_pf *work)
6539{
6540 struct x86_exception fault;
6541
6542 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
6543 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
6544
6545 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
6546 (vcpu->arch.apf.send_user_only &&
6547 kvm_x86_ops->get_cpl(vcpu) == 0))
6548 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
6549 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
6550 fault.vector = PF_VECTOR;
6551 fault.error_code_valid = true;
6552 fault.error_code = 0;
6553 fault.nested_page_fault = false;
6554 fault.address = work->arch.token;
6555 kvm_inject_page_fault(vcpu, &fault);
6556 }
6557}
6558
6559void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6560 struct kvm_async_pf *work)
6561{
6562 struct x86_exception fault;
6563
6564 trace_kvm_async_pf_ready(work->arch.token, work->gva);
6565 if (is_error_page(work->page))
6566 work->arch.token = ~0;
6567 else
6568 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
6569
6570 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
6571 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
6572 fault.vector = PF_VECTOR;
6573 fault.error_code_valid = true;
6574 fault.error_code = 0;
6575 fault.nested_page_fault = false;
6576 fault.address = work->arch.token;
6577 kvm_inject_page_fault(vcpu, &fault);
6578 }
6579 vcpu->arch.apf.halted = false;
6580}
6581
6582bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
6583{
6584 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
6585 return true;
6586 else
6587 return !kvm_event_needs_reinjection(vcpu) &&
6588 kvm_x86_ops->interrupt_allowed(vcpu);
6589}
6590
6591EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
6592EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
6593EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
6594EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
6595EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
6596EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
6597EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
6598EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
6599EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
6600EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
6601EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
6602EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
6603