1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29
30#include <linux/clocksource.h>
31#include <linux/interrupt.h>
32#include <linux/kvm.h>
33#include <linux/fs.h>
34#include <linux/vmalloc.h>
35#include <linux/module.h>
36#include <linux/mman.h>
37#include <linux/highmem.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/cpufreq.h>
41#include <linux/user-return-notifier.h>
42#include <linux/srcu.h>
43#include <linux/slab.h>
44#include <linux/perf_event.h>
45#include <linux/uaccess.h>
46#include <linux/hash.h>
47#include <trace/events/kvm.h>
48
49#define CREATE_TRACE_POINTS
50#include "trace.h"
51
52#include <asm/debugreg.h>
53#include <asm/msr.h>
54#include <asm/desc.h>
55#include <asm/mtrr.h>
56#include <asm/mce.h>
57#include <asm/i387.h>
58#include <asm/xcr.h>
59#include <asm/pvclock.h>
60#include <asm/div64.h>
61
62#define MAX_IO_MSRS 256
63#define CR0_RESERVED_BITS \
64 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
65 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
66 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
67#define CR4_RESERVED_BITS \
68 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
69 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
70 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
71 | X86_CR4_OSXSAVE \
72 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
73
74#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
75
76#define KVM_MAX_MCE_BANKS 32
77#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
78
79
80
81
82
83#ifdef CONFIG_X86_64
84static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
85#else
86static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
87#endif
88
89#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
90#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
91
92static void update_cr8_intercept(struct kvm_vcpu *vcpu);
93static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
94 struct kvm_cpuid_entry2 __user *entries);
95
96struct kvm_x86_ops *kvm_x86_ops;
97EXPORT_SYMBOL_GPL(kvm_x86_ops);
98
99int ignore_msrs = 0;
100module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
101
102#define KVM_NR_SHARED_MSRS 16
103
104struct kvm_shared_msrs_global {
105 int nr;
106 u32 msrs[KVM_NR_SHARED_MSRS];
107};
108
109struct kvm_shared_msrs {
110 struct user_return_notifier urn;
111 bool registered;
112 struct kvm_shared_msr_values {
113 u64 host;
114 u64 curr;
115 } values[KVM_NR_SHARED_MSRS];
116};
117
118static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
119static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
120
121struct kvm_stats_debugfs_item debugfs_entries[] = {
122 { "pf_fixed", VCPU_STAT(pf_fixed) },
123 { "pf_guest", VCPU_STAT(pf_guest) },
124 { "tlb_flush", VCPU_STAT(tlb_flush) },
125 { "invlpg", VCPU_STAT(invlpg) },
126 { "exits", VCPU_STAT(exits) },
127 { "io_exits", VCPU_STAT(io_exits) },
128 { "mmio_exits", VCPU_STAT(mmio_exits) },
129 { "signal_exits", VCPU_STAT(signal_exits) },
130 { "irq_window", VCPU_STAT(irq_window_exits) },
131 { "nmi_window", VCPU_STAT(nmi_window_exits) },
132 { "halt_exits", VCPU_STAT(halt_exits) },
133 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
134 { "hypercalls", VCPU_STAT(hypercalls) },
135 { "request_irq", VCPU_STAT(request_irq_exits) },
136 { "irq_exits", VCPU_STAT(irq_exits) },
137 { "host_state_reload", VCPU_STAT(host_state_reload) },
138 { "efer_reload", VCPU_STAT(efer_reload) },
139 { "fpu_reload", VCPU_STAT(fpu_reload) },
140 { "insn_emulation", VCPU_STAT(insn_emulation) },
141 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
142 { "irq_injections", VCPU_STAT(irq_injections) },
143 { "nmi_injections", VCPU_STAT(nmi_injections) },
144 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
145 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
146 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
147 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
148 { "mmu_flooded", VM_STAT(mmu_flooded) },
149 { "mmu_recycled", VM_STAT(mmu_recycled) },
150 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
151 { "mmu_unsync", VM_STAT(mmu_unsync) },
152 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
153 { "largepages", VM_STAT(lpages) },
154 { NULL }
155};
156
157u64 __read_mostly host_xcr0;
158
159static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
160{
161 int i;
162 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
163 vcpu->arch.apf.gfns[i] = ~0;
164}
165
166static void kvm_on_user_return(struct user_return_notifier *urn)
167{
168 unsigned slot;
169 struct kvm_shared_msrs *locals
170 = container_of(urn, struct kvm_shared_msrs, urn);
171 struct kvm_shared_msr_values *values;
172
173 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
174 values = &locals->values[slot];
175 if (values->host != values->curr) {
176 wrmsrl(shared_msrs_global.msrs[slot], values->host);
177 values->curr = values->host;
178 }
179 }
180 locals->registered = false;
181 user_return_notifier_unregister(urn);
182}
183
184static void shared_msr_update(unsigned slot, u32 msr)
185{
186 struct kvm_shared_msrs *smsr;
187 u64 value;
188
189 smsr = &__get_cpu_var(shared_msrs);
190
191
192 if (slot >= shared_msrs_global.nr) {
193 printk(KERN_ERR "kvm: invalid MSR slot!");
194 return;
195 }
196 rdmsrl_safe(msr, &value);
197 smsr->values[slot].host = value;
198 smsr->values[slot].curr = value;
199}
200
201void kvm_define_shared_msr(unsigned slot, u32 msr)
202{
203 if (slot >= shared_msrs_global.nr)
204 shared_msrs_global.nr = slot + 1;
205 shared_msrs_global.msrs[slot] = msr;
206
207 smp_wmb();
208}
209EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
210
211static void kvm_shared_msr_cpu_online(void)
212{
213 unsigned i;
214
215 for (i = 0; i < shared_msrs_global.nr; ++i)
216 shared_msr_update(i, shared_msrs_global.msrs[i]);
217}
218
219void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
220{
221 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
222
223 if (((value ^ smsr->values[slot].curr) & mask) == 0)
224 return;
225 smsr->values[slot].curr = value;
226 wrmsrl(shared_msrs_global.msrs[slot], value);
227 if (!smsr->registered) {
228 smsr->urn.on_user_return = kvm_on_user_return;
229 user_return_notifier_register(&smsr->urn);
230 smsr->registered = true;
231 }
232}
233EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
234
235static void drop_user_return_notifiers(void *ignore)
236{
237 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
238
239 if (smsr->registered)
240 kvm_on_user_return(&smsr->urn);
241}
242
243u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
244{
245 if (irqchip_in_kernel(vcpu->kvm))
246 return vcpu->arch.apic_base;
247 else
248 return vcpu->arch.apic_base;
249}
250EXPORT_SYMBOL_GPL(kvm_get_apic_base);
251
252void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
253{
254
255 if (irqchip_in_kernel(vcpu->kvm))
256 kvm_lapic_set_base(vcpu, data);
257 else
258 vcpu->arch.apic_base = data;
259}
260EXPORT_SYMBOL_GPL(kvm_set_apic_base);
261
262#define EXCPT_BENIGN 0
263#define EXCPT_CONTRIBUTORY 1
264#define EXCPT_PF 2
265
266static int exception_class(int vector)
267{
268 switch (vector) {
269 case PF_VECTOR:
270 return EXCPT_PF;
271 case DE_VECTOR:
272 case TS_VECTOR:
273 case NP_VECTOR:
274 case SS_VECTOR:
275 case GP_VECTOR:
276 return EXCPT_CONTRIBUTORY;
277 default:
278 break;
279 }
280 return EXCPT_BENIGN;
281}
282
283static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
284 unsigned nr, bool has_error, u32 error_code,
285 bool reinject)
286{
287 u32 prev_nr;
288 int class1, class2;
289
290 kvm_make_request(KVM_REQ_EVENT, vcpu);
291
292 if (!vcpu->arch.exception.pending) {
293 queue:
294 vcpu->arch.exception.pending = true;
295 vcpu->arch.exception.has_error_code = has_error;
296 vcpu->arch.exception.nr = nr;
297 vcpu->arch.exception.error_code = error_code;
298 vcpu->arch.exception.reinject = reinject;
299 return;
300 }
301
302
303 prev_nr = vcpu->arch.exception.nr;
304 if (prev_nr == DF_VECTOR) {
305
306 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
307 return;
308 }
309 class1 = exception_class(prev_nr);
310 class2 = exception_class(nr);
311 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
312 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
313
314 vcpu->arch.exception.pending = true;
315 vcpu->arch.exception.has_error_code = true;
316 vcpu->arch.exception.nr = DF_VECTOR;
317 vcpu->arch.exception.error_code = 0;
318 } else
319
320
321
322 goto queue;
323}
324
325void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
326{
327 kvm_multiple_exception(vcpu, nr, false, 0, false);
328}
329EXPORT_SYMBOL_GPL(kvm_queue_exception);
330
331void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
332{
333 kvm_multiple_exception(vcpu, nr, false, 0, true);
334}
335EXPORT_SYMBOL_GPL(kvm_requeue_exception);
336
337void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
338{
339 if (err)
340 kvm_inject_gp(vcpu, 0);
341 else
342 kvm_x86_ops->skip_emulated_instruction(vcpu);
343}
344EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
345
346void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
347{
348 ++vcpu->stat.pf_guest;
349 vcpu->arch.cr2 = fault->address;
350 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
351}
352
353void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
354{
355 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
356 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
357 else
358 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
359}
360
361void kvm_inject_nmi(struct kvm_vcpu *vcpu)
362{
363 kvm_make_request(KVM_REQ_EVENT, vcpu);
364 vcpu->arch.nmi_pending = 1;
365}
366EXPORT_SYMBOL_GPL(kvm_inject_nmi);
367
368void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
369{
370 kvm_multiple_exception(vcpu, nr, true, error_code, false);
371}
372EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
373
374void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
375{
376 kvm_multiple_exception(vcpu, nr, true, error_code, true);
377}
378EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
379
380
381
382
383
384bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
385{
386 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
387 return true;
388 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
389 return false;
390}
391EXPORT_SYMBOL_GPL(kvm_require_cpl);
392
393
394
395
396
397
398int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
399 gfn_t ngfn, void *data, int offset, int len,
400 u32 access)
401{
402 gfn_t real_gfn;
403 gpa_t ngpa;
404
405 ngpa = gfn_to_gpa(ngfn);
406 real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
407 if (real_gfn == UNMAPPED_GVA)
408 return -EFAULT;
409
410 real_gfn = gpa_to_gfn(real_gfn);
411
412 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
413}
414EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
415
416int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
417 void *data, int offset, int len, u32 access)
418{
419 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
420 data, offset, len, access);
421}
422
423
424
425
426int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
427{
428 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
429 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
430 int i;
431 int ret;
432 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
433
434 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
435 offset * sizeof(u64), sizeof(pdpte),
436 PFERR_USER_MASK|PFERR_WRITE_MASK);
437 if (ret < 0) {
438 ret = 0;
439 goto out;
440 }
441 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
442 if (is_present_gpte(pdpte[i]) &&
443 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
444 ret = 0;
445 goto out;
446 }
447 }
448 ret = 1;
449
450 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
451 __set_bit(VCPU_EXREG_PDPTR,
452 (unsigned long *)&vcpu->arch.regs_avail);
453 __set_bit(VCPU_EXREG_PDPTR,
454 (unsigned long *)&vcpu->arch.regs_dirty);
455out:
456
457 return ret;
458}
459EXPORT_SYMBOL_GPL(load_pdptrs);
460
461static bool pdptrs_changed(struct kvm_vcpu *vcpu)
462{
463 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
464 bool changed = true;
465 int offset;
466 gfn_t gfn;
467 int r;
468
469 if (is_long_mode(vcpu) || !is_pae(vcpu))
470 return false;
471
472 if (!test_bit(VCPU_EXREG_PDPTR,
473 (unsigned long *)&vcpu->arch.regs_avail))
474 return true;
475
476 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
477 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
478 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
479 PFERR_USER_MASK | PFERR_WRITE_MASK);
480 if (r < 0)
481 goto out;
482 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
483out:
484
485 return changed;
486}
487
488int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
489{
490 unsigned long old_cr0 = kvm_read_cr0(vcpu);
491 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
492 X86_CR0_CD | X86_CR0_NW;
493
494 cr0 |= X86_CR0_ET;
495
496#ifdef CONFIG_X86_64
497 if (cr0 & 0xffffffff00000000UL)
498 return 1;
499#endif
500
501 cr0 &= ~CR0_RESERVED_BITS;
502
503 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
504 return 1;
505
506 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
507 return 1;
508
509 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
510#ifdef CONFIG_X86_64
511 if ((vcpu->arch.efer & EFER_LME)) {
512 int cs_db, cs_l;
513
514 if (!is_pae(vcpu))
515 return 1;
516 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
517 if (cs_l)
518 return 1;
519 } else
520#endif
521 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
522 kvm_read_cr3(vcpu)))
523 return 1;
524 }
525
526 kvm_x86_ops->set_cr0(vcpu, cr0);
527
528 if ((cr0 ^ old_cr0) & X86_CR0_PG)
529 kvm_clear_async_pf_completion_queue(vcpu);
530
531 if ((cr0 ^ old_cr0) & update_bits)
532 kvm_mmu_reset_context(vcpu);
533 return 0;
534}
535EXPORT_SYMBOL_GPL(kvm_set_cr0);
536
537void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
538{
539 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
540}
541EXPORT_SYMBOL_GPL(kvm_lmsw);
542
543int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
544{
545 u64 xcr0;
546
547
548 if (index != XCR_XFEATURE_ENABLED_MASK)
549 return 1;
550 xcr0 = xcr;
551 if (kvm_x86_ops->get_cpl(vcpu) != 0)
552 return 1;
553 if (!(xcr0 & XSTATE_FP))
554 return 1;
555 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
556 return 1;
557 if (xcr0 & ~host_xcr0)
558 return 1;
559 vcpu->arch.xcr0 = xcr0;
560 vcpu->guest_xcr0_loaded = 0;
561 return 0;
562}
563
564int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
565{
566 if (__kvm_set_xcr(vcpu, index, xcr)) {
567 kvm_inject_gp(vcpu, 0);
568 return 1;
569 }
570 return 0;
571}
572EXPORT_SYMBOL_GPL(kvm_set_xcr);
573
574static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
575{
576 struct kvm_cpuid_entry2 *best;
577
578 best = kvm_find_cpuid_entry(vcpu, 1, 0);
579 return best && (best->ecx & bit(X86_FEATURE_XSAVE));
580}
581
582static void update_cpuid(struct kvm_vcpu *vcpu)
583{
584 struct kvm_cpuid_entry2 *best;
585
586 best = kvm_find_cpuid_entry(vcpu, 1, 0);
587 if (!best)
588 return;
589
590
591 if (cpu_has_xsave && best->function == 0x1) {
592 best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
593 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
594 best->ecx |= bit(X86_FEATURE_OSXSAVE);
595 }
596}
597
598int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
599{
600 unsigned long old_cr4 = kvm_read_cr4(vcpu);
601 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
602
603 if (cr4 & CR4_RESERVED_BITS)
604 return 1;
605
606 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
607 return 1;
608
609 if (is_long_mode(vcpu)) {
610 if (!(cr4 & X86_CR4_PAE))
611 return 1;
612 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
613 && ((cr4 ^ old_cr4) & pdptr_bits)
614 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
615 kvm_read_cr3(vcpu)))
616 return 1;
617
618 if (cr4 & X86_CR4_VMXE)
619 return 1;
620
621 kvm_x86_ops->set_cr4(vcpu, cr4);
622
623 if ((cr4 ^ old_cr4) & pdptr_bits)
624 kvm_mmu_reset_context(vcpu);
625
626 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
627 update_cpuid(vcpu);
628
629 return 0;
630}
631EXPORT_SYMBOL_GPL(kvm_set_cr4);
632
633int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
634{
635 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
636 kvm_mmu_sync_roots(vcpu);
637 kvm_mmu_flush_tlb(vcpu);
638 return 0;
639 }
640
641 if (is_long_mode(vcpu)) {
642 if (cr3 & CR3_L_MODE_RESERVED_BITS)
643 return 1;
644 } else {
645 if (is_pae(vcpu)) {
646 if (cr3 & CR3_PAE_RESERVED_BITS)
647 return 1;
648 if (is_paging(vcpu) &&
649 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
650 return 1;
651 }
652
653
654
655
656 }
657
658
659
660
661
662
663
664
665
666
667 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
668 return 1;
669 vcpu->arch.cr3 = cr3;
670 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
671 vcpu->arch.mmu.new_cr3(vcpu);
672 return 0;
673}
674EXPORT_SYMBOL_GPL(kvm_set_cr3);
675
676int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
677{
678 if (cr8 & CR8_RESERVED_BITS)
679 return 1;
680 if (irqchip_in_kernel(vcpu->kvm))
681 kvm_lapic_set_tpr(vcpu, cr8);
682 else
683 vcpu->arch.cr8 = cr8;
684 return 0;
685}
686EXPORT_SYMBOL_GPL(kvm_set_cr8);
687
688unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
689{
690 if (irqchip_in_kernel(vcpu->kvm))
691 return kvm_lapic_get_cr8(vcpu);
692 else
693 return vcpu->arch.cr8;
694}
695EXPORT_SYMBOL_GPL(kvm_get_cr8);
696
697static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
698{
699 switch (dr) {
700 case 0 ... 3:
701 vcpu->arch.db[dr] = val;
702 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
703 vcpu->arch.eff_db[dr] = val;
704 break;
705 case 4:
706 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
707 return 1;
708
709 case 6:
710 if (val & 0xffffffff00000000ULL)
711 return -1;
712 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
713 break;
714 case 5:
715 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
716 return 1;
717
718 default:
719 if (val & 0xffffffff00000000ULL)
720 return -1;
721 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
722 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
723 kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
724 vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
725 }
726 break;
727 }
728
729 return 0;
730}
731
732int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
733{
734 int res;
735
736 res = __kvm_set_dr(vcpu, dr, val);
737 if (res > 0)
738 kvm_queue_exception(vcpu, UD_VECTOR);
739 else if (res < 0)
740 kvm_inject_gp(vcpu, 0);
741
742 return res;
743}
744EXPORT_SYMBOL_GPL(kvm_set_dr);
745
746static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
747{
748 switch (dr) {
749 case 0 ... 3:
750 *val = vcpu->arch.db[dr];
751 break;
752 case 4:
753 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
754 return 1;
755
756 case 6:
757 *val = vcpu->arch.dr6;
758 break;
759 case 5:
760 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
761 return 1;
762
763 default:
764 *val = vcpu->arch.dr7;
765 break;
766 }
767
768 return 0;
769}
770
771int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
772{
773 if (_kvm_get_dr(vcpu, dr, val)) {
774 kvm_queue_exception(vcpu, UD_VECTOR);
775 return 1;
776 }
777 return 0;
778}
779EXPORT_SYMBOL_GPL(kvm_get_dr);
780
781
782
783
784
785
786
787
788
789
790#define KVM_SAVE_MSRS_BEGIN 8
791static u32 msrs_to_save[] = {
792 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
793 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
794 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
795 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN,
796 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
797 MSR_STAR,
798#ifdef CONFIG_X86_64
799 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
800#endif
801 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
802};
803
804static unsigned num_msrs_to_save;
805
806static u32 emulated_msrs[] = {
807 MSR_IA32_MISC_ENABLE,
808 MSR_IA32_MCG_STATUS,
809 MSR_IA32_MCG_CTL,
810};
811
812static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
813{
814 u64 old_efer = vcpu->arch.efer;
815
816 if (efer & efer_reserved_bits)
817 return 1;
818
819 if (is_paging(vcpu)
820 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
821 return 1;
822
823 if (efer & EFER_FFXSR) {
824 struct kvm_cpuid_entry2 *feat;
825
826 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
827 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
828 return 1;
829 }
830
831 if (efer & EFER_SVME) {
832 struct kvm_cpuid_entry2 *feat;
833
834 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
835 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
836 return 1;
837 }
838
839 efer &= ~EFER_LMA;
840 efer |= vcpu->arch.efer & EFER_LMA;
841
842 kvm_x86_ops->set_efer(vcpu, efer);
843
844 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
845
846
847 if ((efer ^ old_efer) & EFER_NX)
848 kvm_mmu_reset_context(vcpu);
849
850 return 0;
851}
852
853void kvm_enable_efer_bits(u64 mask)
854{
855 efer_reserved_bits &= ~mask;
856}
857EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
858
859
860
861
862
863
864
865int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
866{
867 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
868}
869
870
871
872
873static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
874{
875 return kvm_set_msr(vcpu, index, *data);
876}
877
878static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
879{
880 int version;
881 int r;
882 struct pvclock_wall_clock wc;
883 struct timespec boot;
884
885 if (!wall_clock)
886 return;
887
888 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
889 if (r)
890 return;
891
892 if (version & 1)
893 ++version;
894
895 ++version;
896
897 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
898
899
900
901
902
903
904
905 getboottime(&boot);
906
907 wc.sec = boot.tv_sec;
908 wc.nsec = boot.tv_nsec;
909 wc.version = version;
910
911 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
912
913 version++;
914 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
915}
916
917static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
918{
919 uint32_t quotient, remainder;
920
921
922
923 __asm__ ( "divl %4"
924 : "=a" (quotient), "=d" (remainder)
925 : "0" (0), "1" (dividend), "r" (divisor) );
926 return quotient;
927}
928
929static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
930 s8 *pshift, u32 *pmultiplier)
931{
932 uint64_t scaled64;
933 int32_t shift = 0;
934 uint64_t tps64;
935 uint32_t tps32;
936
937 tps64 = base_khz * 1000LL;
938 scaled64 = scaled_khz * 1000LL;
939 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
940 tps64 >>= 1;
941 shift--;
942 }
943
944 tps32 = (uint32_t)tps64;
945 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
946 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
947 scaled64 >>= 1;
948 else
949 tps32 <<= 1;
950 shift++;
951 }
952
953 *pshift = shift;
954 *pmultiplier = div_frac(scaled64, tps32);
955
956 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
957 __func__, base_khz, scaled_khz, shift, *pmultiplier);
958}
959
960static inline u64 get_kernel_ns(void)
961{
962 struct timespec ts;
963
964 WARN_ON(preemptible());
965 ktime_get_ts(&ts);
966 monotonic_to_bootbased(&ts);
967 return timespec_to_ns(&ts);
968}
969
970static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
971unsigned long max_tsc_khz;
972
973static inline int kvm_tsc_changes_freq(void)
974{
975 int cpu = get_cpu();
976 int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
977 cpufreq_quick_get(cpu) != 0;
978 put_cpu();
979 return ret;
980}
981
982static inline u64 nsec_to_cycles(u64 nsec)
983{
984 u64 ret;
985
986 WARN_ON(preemptible());
987 if (kvm_tsc_changes_freq())
988 printk_once(KERN_WARNING
989 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
990 ret = nsec * __this_cpu_read(cpu_tsc_khz);
991 do_div(ret, USEC_PER_SEC);
992 return ret;
993}
994
995static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz)
996{
997
998 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
999 &kvm->arch.virtual_tsc_shift,
1000 &kvm->arch.virtual_tsc_mult);
1001 kvm->arch.virtual_tsc_khz = this_tsc_khz;
1002}
1003
1004static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1005{
1006 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
1007 vcpu->kvm->arch.virtual_tsc_mult,
1008 vcpu->kvm->arch.virtual_tsc_shift);
1009 tsc += vcpu->arch.last_tsc_write;
1010 return tsc;
1011}
1012
1013void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
1014{
1015 struct kvm *kvm = vcpu->kvm;
1016 u64 offset, ns, elapsed;
1017 unsigned long flags;
1018 s64 sdiff;
1019
1020 spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1021 offset = data - native_read_tsc();
1022 ns = get_kernel_ns();
1023 elapsed = ns - kvm->arch.last_tsc_nsec;
1024 sdiff = data - kvm->arch.last_tsc_write;
1025 if (sdiff < 0)
1026 sdiff = -sdiff;
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037 if (sdiff < nsec_to_cycles(5ULL * NSEC_PER_SEC) &&
1038 elapsed < 5ULL * NSEC_PER_SEC) {
1039 if (!check_tsc_unstable()) {
1040 offset = kvm->arch.last_tsc_offset;
1041 pr_debug("kvm: matched tsc offset for %llu\n", data);
1042 } else {
1043 u64 delta = nsec_to_cycles(elapsed);
1044 offset += delta;
1045 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1046 }
1047 ns = kvm->arch.last_tsc_nsec;
1048 }
1049 kvm->arch.last_tsc_nsec = ns;
1050 kvm->arch.last_tsc_write = data;
1051 kvm->arch.last_tsc_offset = offset;
1052 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1053 spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1054
1055
1056 vcpu->arch.hv_clock.tsc_timestamp = 0;
1057 vcpu->arch.last_tsc_write = data;
1058 vcpu->arch.last_tsc_nsec = ns;
1059}
1060EXPORT_SYMBOL_GPL(kvm_write_tsc);
1061
1062static int kvm_guest_time_update(struct kvm_vcpu *v)
1063{
1064 unsigned long flags;
1065 struct kvm_vcpu_arch *vcpu = &v->arch;
1066 void *shared_kaddr;
1067 unsigned long this_tsc_khz;
1068 s64 kernel_ns, max_kernel_ns;
1069 u64 tsc_timestamp;
1070
1071
1072 local_irq_save(flags);
1073 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
1074 kernel_ns = get_kernel_ns();
1075 this_tsc_khz = __this_cpu_read(cpu_tsc_khz);
1076
1077 if (unlikely(this_tsc_khz == 0)) {
1078 local_irq_restore(flags);
1079 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1080 return 1;
1081 }
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 if (vcpu->tsc_catchup) {
1094 u64 tsc = compute_guest_tsc(v, kernel_ns);
1095 if (tsc > tsc_timestamp) {
1096 kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
1097 tsc_timestamp = tsc;
1098 }
1099 }
1100
1101 local_irq_restore(flags);
1102
1103 if (!vcpu->time_page)
1104 return 0;
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127 max_kernel_ns = 0;
1128 if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
1129 max_kernel_ns = vcpu->last_guest_tsc -
1130 vcpu->hv_clock.tsc_timestamp;
1131 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1132 vcpu->hv_clock.tsc_to_system_mul,
1133 vcpu->hv_clock.tsc_shift);
1134 max_kernel_ns += vcpu->last_kernel_ns;
1135 }
1136
1137 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1138 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1139 &vcpu->hv_clock.tsc_shift,
1140 &vcpu->hv_clock.tsc_to_system_mul);
1141 vcpu->hw_tsc_khz = this_tsc_khz;
1142 }
1143
1144 if (max_kernel_ns > kernel_ns)
1145 kernel_ns = max_kernel_ns;
1146
1147
1148 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1149 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1150 vcpu->last_kernel_ns = kernel_ns;
1151 vcpu->last_guest_tsc = tsc_timestamp;
1152 vcpu->hv_clock.flags = 0;
1153
1154
1155
1156
1157
1158
1159 vcpu->hv_clock.version += 2;
1160
1161 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
1162
1163 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1164 sizeof(vcpu->hv_clock));
1165
1166 kunmap_atomic(shared_kaddr, KM_USER0);
1167
1168 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1169 return 0;
1170}
1171
1172static bool msr_mtrr_valid(unsigned msr)
1173{
1174 switch (msr) {
1175 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1176 case MSR_MTRRfix64K_00000:
1177 case MSR_MTRRfix16K_80000:
1178 case MSR_MTRRfix16K_A0000:
1179 case MSR_MTRRfix4K_C0000:
1180 case MSR_MTRRfix4K_C8000:
1181 case MSR_MTRRfix4K_D0000:
1182 case MSR_MTRRfix4K_D8000:
1183 case MSR_MTRRfix4K_E0000:
1184 case MSR_MTRRfix4K_E8000:
1185 case MSR_MTRRfix4K_F0000:
1186 case MSR_MTRRfix4K_F8000:
1187 case MSR_MTRRdefType:
1188 case MSR_IA32_CR_PAT:
1189 return true;
1190 case 0x2f8:
1191 return true;
1192 }
1193 return false;
1194}
1195
1196static bool valid_pat_type(unsigned t)
1197{
1198 return t < 8 && (1 << t) & 0xf3;
1199}
1200
1201static bool valid_mtrr_type(unsigned t)
1202{
1203 return t < 8 && (1 << t) & 0x73;
1204}
1205
1206static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1207{
1208 int i;
1209
1210 if (!msr_mtrr_valid(msr))
1211 return false;
1212
1213 if (msr == MSR_IA32_CR_PAT) {
1214 for (i = 0; i < 8; i++)
1215 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1216 return false;
1217 return true;
1218 } else if (msr == MSR_MTRRdefType) {
1219 if (data & ~0xcff)
1220 return false;
1221 return valid_mtrr_type(data & 0xff);
1222 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1223 for (i = 0; i < 8 ; i++)
1224 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1225 return false;
1226 return true;
1227 }
1228
1229
1230 return valid_mtrr_type(data & 0xff);
1231}
1232
1233static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1234{
1235 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1236
1237 if (!mtrr_valid(vcpu, msr, data))
1238 return 1;
1239
1240 if (msr == MSR_MTRRdefType) {
1241 vcpu->arch.mtrr_state.def_type = data;
1242 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1243 } else if (msr == MSR_MTRRfix64K_00000)
1244 p[0] = data;
1245 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1246 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1247 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1248 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1249 else if (msr == MSR_IA32_CR_PAT)
1250 vcpu->arch.pat = data;
1251 else {
1252 int idx, is_mtrr_mask;
1253 u64 *pt;
1254
1255 idx = (msr - 0x200) / 2;
1256 is_mtrr_mask = msr - 0x200 - 2 * idx;
1257 if (!is_mtrr_mask)
1258 pt =
1259 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1260 else
1261 pt =
1262 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1263 *pt = data;
1264 }
1265
1266 kvm_mmu_reset_context(vcpu);
1267 return 0;
1268}
1269
1270static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1271{
1272 u64 mcg_cap = vcpu->arch.mcg_cap;
1273 unsigned bank_num = mcg_cap & 0xff;
1274
1275 switch (msr) {
1276 case MSR_IA32_MCG_STATUS:
1277 vcpu->arch.mcg_status = data;
1278 break;
1279 case MSR_IA32_MCG_CTL:
1280 if (!(mcg_cap & MCG_CTL_P))
1281 return 1;
1282 if (data != 0 && data != ~(u64)0)
1283 return -1;
1284 vcpu->arch.mcg_ctl = data;
1285 break;
1286 default:
1287 if (msr >= MSR_IA32_MC0_CTL &&
1288 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1289 u32 offset = msr - MSR_IA32_MC0_CTL;
1290
1291
1292
1293
1294
1295 if ((offset & 0x3) == 0 &&
1296 data != 0 && (data | (1 << 10)) != ~(u64)0)
1297 return -1;
1298 vcpu->arch.mce_banks[offset] = data;
1299 break;
1300 }
1301 return 1;
1302 }
1303 return 0;
1304}
1305
1306static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1307{
1308 struct kvm *kvm = vcpu->kvm;
1309 int lm = is_long_mode(vcpu);
1310 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1311 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1312 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1313 : kvm->arch.xen_hvm_config.blob_size_32;
1314 u32 page_num = data & ~PAGE_MASK;
1315 u64 page_addr = data & PAGE_MASK;
1316 u8 *page;
1317 int r;
1318
1319 r = -E2BIG;
1320 if (page_num >= blob_size)
1321 goto out;
1322 r = -ENOMEM;
1323 page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1324 if (!page)
1325 goto out;
1326 r = -EFAULT;
1327 if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
1328 goto out_free;
1329 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1330 goto out_free;
1331 r = 0;
1332out_free:
1333 kfree(page);
1334out:
1335 return r;
1336}
1337
1338static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1339{
1340 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1341}
1342
1343static bool kvm_hv_msr_partition_wide(u32 msr)
1344{
1345 bool r = false;
1346 switch (msr) {
1347 case HV_X64_MSR_GUEST_OS_ID:
1348 case HV_X64_MSR_HYPERCALL:
1349 r = true;
1350 break;
1351 }
1352
1353 return r;
1354}
1355
1356static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1357{
1358 struct kvm *kvm = vcpu->kvm;
1359
1360 switch (msr) {
1361 case HV_X64_MSR_GUEST_OS_ID:
1362 kvm->arch.hv_guest_os_id = data;
1363
1364 if (!kvm->arch.hv_guest_os_id)
1365 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1366 break;
1367 case HV_X64_MSR_HYPERCALL: {
1368 u64 gfn;
1369 unsigned long addr;
1370 u8 instructions[4];
1371
1372
1373 if (!kvm->arch.hv_guest_os_id)
1374 break;
1375 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1376 kvm->arch.hv_hypercall = data;
1377 break;
1378 }
1379 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1380 addr = gfn_to_hva(kvm, gfn);
1381 if (kvm_is_error_hva(addr))
1382 return 1;
1383 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1384 ((unsigned char *)instructions)[3] = 0xc3;
1385 if (copy_to_user((void __user *)addr, instructions, 4))
1386 return 1;
1387 kvm->arch.hv_hypercall = data;
1388 break;
1389 }
1390 default:
1391 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1392 "data 0x%llx\n", msr, data);
1393 return 1;
1394 }
1395 return 0;
1396}
1397
1398static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1399{
1400 switch (msr) {
1401 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1402 unsigned long addr;
1403
1404 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1405 vcpu->arch.hv_vapic = data;
1406 break;
1407 }
1408 addr = gfn_to_hva(vcpu->kvm, data >>
1409 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1410 if (kvm_is_error_hva(addr))
1411 return 1;
1412 if (clear_user((void __user *)addr, PAGE_SIZE))
1413 return 1;
1414 vcpu->arch.hv_vapic = data;
1415 break;
1416 }
1417 case HV_X64_MSR_EOI:
1418 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1419 case HV_X64_MSR_ICR:
1420 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1421 case HV_X64_MSR_TPR:
1422 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1423 default:
1424 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1425 "data 0x%llx\n", msr, data);
1426 return 1;
1427 }
1428
1429 return 0;
1430}
1431
1432static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1433{
1434 gpa_t gpa = data & ~0x3f;
1435
1436
1437 if (data & 0x3c)
1438 return 1;
1439
1440 vcpu->arch.apf.msr_val = data;
1441
1442 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1443 kvm_clear_async_pf_completion_queue(vcpu);
1444 kvm_async_pf_hash_reset(vcpu);
1445 return 0;
1446 }
1447
1448 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
1449 return 1;
1450
1451 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1452 kvm_async_pf_wakeup_all(vcpu);
1453 return 0;
1454}
1455
1456int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1457{
1458 switch (msr) {
1459 case MSR_EFER:
1460 return set_efer(vcpu, data);
1461 case MSR_K7_HWCR:
1462 data &= ~(u64)0x40;
1463 data &= ~(u64)0x100;
1464 if (data != 0) {
1465 pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1466 data);
1467 return 1;
1468 }
1469 break;
1470 case MSR_FAM10H_MMIO_CONF_BASE:
1471 if (data != 0) {
1472 pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1473 "0x%llx\n", data);
1474 return 1;
1475 }
1476 break;
1477 case MSR_AMD64_NB_CFG:
1478 break;
1479 case MSR_IA32_DEBUGCTLMSR:
1480 if (!data) {
1481
1482 break;
1483 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
1484
1485
1486 return 1;
1487 }
1488 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1489 __func__, data);
1490 break;
1491 case MSR_IA32_UCODE_REV:
1492 case MSR_IA32_UCODE_WRITE:
1493 case MSR_VM_HSAVE_PA:
1494 case MSR_AMD64_PATCH_LOADER:
1495 break;
1496 case 0x200 ... 0x2ff:
1497 return set_msr_mtrr(vcpu, msr, data);
1498 case MSR_IA32_APICBASE:
1499 kvm_set_apic_base(vcpu, data);
1500 break;
1501 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1502 return kvm_x2apic_msr_write(vcpu, msr, data);
1503 case MSR_IA32_MISC_ENABLE:
1504 vcpu->arch.ia32_misc_enable_msr = data;
1505 break;
1506 case MSR_KVM_WALL_CLOCK_NEW:
1507 case MSR_KVM_WALL_CLOCK:
1508 vcpu->kvm->arch.wall_clock = data;
1509 kvm_write_wall_clock(vcpu->kvm, data);
1510 break;
1511 case MSR_KVM_SYSTEM_TIME_NEW:
1512 case MSR_KVM_SYSTEM_TIME: {
1513 if (vcpu->arch.time_page) {
1514 kvm_release_page_dirty(vcpu->arch.time_page);
1515 vcpu->arch.time_page = NULL;
1516 }
1517
1518 vcpu->arch.time = data;
1519 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1520
1521
1522 if (!(data & 1))
1523 break;
1524
1525
1526 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1527
1528 vcpu->arch.time_page =
1529 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1530
1531 if (is_error_page(vcpu->arch.time_page)) {
1532 kvm_release_page_clean(vcpu->arch.time_page);
1533 vcpu->arch.time_page = NULL;
1534 }
1535 break;
1536 }
1537 case MSR_KVM_ASYNC_PF_EN:
1538 if (kvm_pv_enable_async_pf(vcpu, data))
1539 return 1;
1540 break;
1541 case MSR_IA32_MCG_CTL:
1542 case MSR_IA32_MCG_STATUS:
1543 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1544 return set_msr_mce(vcpu, msr, data);
1545
1546
1547
1548
1549
1550
1551
1552
1553 case MSR_P6_EVNTSEL0:
1554 case MSR_P6_EVNTSEL1:
1555 case MSR_K7_EVNTSEL0:
1556 case MSR_K7_EVNTSEL1:
1557 case MSR_K7_EVNTSEL2:
1558 case MSR_K7_EVNTSEL3:
1559 if (data != 0)
1560 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1561 "0x%x data 0x%llx\n", msr, data);
1562 break;
1563
1564
1565
1566 case MSR_P6_PERFCTR0:
1567 case MSR_P6_PERFCTR1:
1568 case MSR_K7_PERFCTR0:
1569 case MSR_K7_PERFCTR1:
1570 case MSR_K7_PERFCTR2:
1571 case MSR_K7_PERFCTR3:
1572 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1573 "0x%x data 0x%llx\n", msr, data);
1574 break;
1575 case MSR_K7_CLK_CTL:
1576
1577
1578
1579
1580
1581
1582
1583
1584 break;
1585 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1586 if (kvm_hv_msr_partition_wide(msr)) {
1587 int r;
1588 mutex_lock(&vcpu->kvm->lock);
1589 r = set_msr_hyperv_pw(vcpu, msr, data);
1590 mutex_unlock(&vcpu->kvm->lock);
1591 return r;
1592 } else
1593 return set_msr_hyperv(vcpu, msr, data);
1594 break;
1595 default:
1596 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1597 return xen_hvm_config(vcpu, data);
1598 if (!ignore_msrs) {
1599 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
1600 msr, data);
1601 return 1;
1602 } else {
1603 pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
1604 msr, data);
1605 break;
1606 }
1607 }
1608 return 0;
1609}
1610EXPORT_SYMBOL_GPL(kvm_set_msr_common);
1611
1612
1613
1614
1615
1616
1617
1618int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1619{
1620 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
1621}
1622
1623static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1624{
1625 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1626
1627 if (!msr_mtrr_valid(msr))
1628 return 1;
1629
1630 if (msr == MSR_MTRRdefType)
1631 *pdata = vcpu->arch.mtrr_state.def_type +
1632 (vcpu->arch.mtrr_state.enabled << 10);
1633 else if (msr == MSR_MTRRfix64K_00000)
1634 *pdata = p[0];
1635 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1636 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
1637 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1638 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
1639 else if (msr == MSR_IA32_CR_PAT)
1640 *pdata = vcpu->arch.pat;
1641 else {
1642 int idx, is_mtrr_mask;
1643 u64 *pt;
1644
1645 idx = (msr - 0x200) / 2;
1646 is_mtrr_mask = msr - 0x200 - 2 * idx;
1647 if (!is_mtrr_mask)
1648 pt =
1649 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1650 else
1651 pt =
1652 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1653 *pdata = *pt;
1654 }
1655
1656 return 0;
1657}
1658
1659static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1660{
1661 u64 data;
1662 u64 mcg_cap = vcpu->arch.mcg_cap;
1663 unsigned bank_num = mcg_cap & 0xff;
1664
1665 switch (msr) {
1666 case MSR_IA32_P5_MC_ADDR:
1667 case MSR_IA32_P5_MC_TYPE:
1668 data = 0;
1669 break;
1670 case MSR_IA32_MCG_CAP:
1671 data = vcpu->arch.mcg_cap;
1672 break;
1673 case MSR_IA32_MCG_CTL:
1674 if (!(mcg_cap & MCG_CTL_P))
1675 return 1;
1676 data = vcpu->arch.mcg_ctl;
1677 break;
1678 case MSR_IA32_MCG_STATUS:
1679 data = vcpu->arch.mcg_status;
1680 break;
1681 default:
1682 if (msr >= MSR_IA32_MC0_CTL &&
1683 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1684 u32 offset = msr - MSR_IA32_MC0_CTL;
1685 data = vcpu->arch.mce_banks[offset];
1686 break;
1687 }
1688 return 1;
1689 }
1690 *pdata = data;
1691 return 0;
1692}
1693
1694static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1695{
1696 u64 data = 0;
1697 struct kvm *kvm = vcpu->kvm;
1698
1699 switch (msr) {
1700 case HV_X64_MSR_GUEST_OS_ID:
1701 data = kvm->arch.hv_guest_os_id;
1702 break;
1703 case HV_X64_MSR_HYPERCALL:
1704 data = kvm->arch.hv_hypercall;
1705 break;
1706 default:
1707 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1708 return 1;
1709 }
1710
1711 *pdata = data;
1712 return 0;
1713}
1714
1715static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1716{
1717 u64 data = 0;
1718
1719 switch (msr) {
1720 case HV_X64_MSR_VP_INDEX: {
1721 int r;
1722 struct kvm_vcpu *v;
1723 kvm_for_each_vcpu(r, v, vcpu->kvm)
1724 if (v == vcpu)
1725 data = r;
1726 break;
1727 }
1728 case HV_X64_MSR_EOI:
1729 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1730 case HV_X64_MSR_ICR:
1731 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1732 case HV_X64_MSR_TPR:
1733 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1734 default:
1735 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1736 return 1;
1737 }
1738 *pdata = data;
1739 return 0;
1740}
1741
1742int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1743{
1744 u64 data;
1745
1746 switch (msr) {
1747 case MSR_IA32_PLATFORM_ID:
1748 case MSR_IA32_UCODE_REV:
1749 case MSR_IA32_EBL_CR_POWERON:
1750 case MSR_IA32_DEBUGCTLMSR:
1751 case MSR_IA32_LASTBRANCHFROMIP:
1752 case MSR_IA32_LASTBRANCHTOIP:
1753 case MSR_IA32_LASTINTFROMIP:
1754 case MSR_IA32_LASTINTTOIP:
1755 case MSR_K8_SYSCFG:
1756 case MSR_K7_HWCR:
1757 case MSR_VM_HSAVE_PA:
1758 case MSR_P6_PERFCTR0:
1759 case MSR_P6_PERFCTR1:
1760 case MSR_P6_EVNTSEL0:
1761 case MSR_P6_EVNTSEL1:
1762 case MSR_K7_EVNTSEL0:
1763 case MSR_K7_PERFCTR0:
1764 case MSR_K8_INT_PENDING_MSG:
1765 case MSR_AMD64_NB_CFG:
1766 case MSR_FAM10H_MMIO_CONF_BASE:
1767 data = 0;
1768 break;
1769 case MSR_MTRRcap:
1770 data = 0x500 | KVM_NR_VAR_MTRR;
1771 break;
1772 case 0x200 ... 0x2ff:
1773 return get_msr_mtrr(vcpu, msr, pdata);
1774 case 0xcd:
1775 data = 3;
1776 break;
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788 case MSR_EBC_FREQUENCY_ID:
1789 data = 1 << 24;
1790 break;
1791 case MSR_IA32_APICBASE:
1792 data = kvm_get_apic_base(vcpu);
1793 break;
1794 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1795 return kvm_x2apic_msr_read(vcpu, msr, pdata);
1796 break;
1797 case MSR_IA32_MISC_ENABLE:
1798 data = vcpu->arch.ia32_misc_enable_msr;
1799 break;
1800 case MSR_IA32_PERF_STATUS:
1801
1802 data = 1000ULL;
1803
1804 data |= (((uint64_t)4ULL) << 40);
1805 break;
1806 case MSR_EFER:
1807 data = vcpu->arch.efer;
1808 break;
1809 case MSR_KVM_WALL_CLOCK:
1810 case MSR_KVM_WALL_CLOCK_NEW:
1811 data = vcpu->kvm->arch.wall_clock;
1812 break;
1813 case MSR_KVM_SYSTEM_TIME:
1814 case MSR_KVM_SYSTEM_TIME_NEW:
1815 data = vcpu->arch.time;
1816 break;
1817 case MSR_KVM_ASYNC_PF_EN:
1818 data = vcpu->arch.apf.msr_val;
1819 break;
1820 case MSR_IA32_P5_MC_ADDR:
1821 case MSR_IA32_P5_MC_TYPE:
1822 case MSR_IA32_MCG_CAP:
1823 case MSR_IA32_MCG_CTL:
1824 case MSR_IA32_MCG_STATUS:
1825 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1826 return get_msr_mce(vcpu, msr, pdata);
1827 case MSR_K7_CLK_CTL:
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837 data = 0x20000000;
1838 break;
1839 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1840 if (kvm_hv_msr_partition_wide(msr)) {
1841 int r;
1842 mutex_lock(&vcpu->kvm->lock);
1843 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1844 mutex_unlock(&vcpu->kvm->lock);
1845 return r;
1846 } else
1847 return get_msr_hyperv(vcpu, msr, pdata);
1848 break;
1849 default:
1850 if (!ignore_msrs) {
1851 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
1852 return 1;
1853 } else {
1854 pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
1855 data = 0;
1856 }
1857 break;
1858 }
1859 *pdata = data;
1860 return 0;
1861}
1862EXPORT_SYMBOL_GPL(kvm_get_msr_common);
1863
1864
1865
1866
1867
1868
1869static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1870 struct kvm_msr_entry *entries,
1871 int (*do_msr)(struct kvm_vcpu *vcpu,
1872 unsigned index, u64 *data))
1873{
1874 int i, idx;
1875
1876 idx = srcu_read_lock(&vcpu->kvm->srcu);
1877 for (i = 0; i < msrs->nmsrs; ++i)
1878 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1879 break;
1880 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1881
1882 return i;
1883}
1884
1885
1886
1887
1888
1889
1890static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
1891 int (*do_msr)(struct kvm_vcpu *vcpu,
1892 unsigned index, u64 *data),
1893 int writeback)
1894{
1895 struct kvm_msrs msrs;
1896 struct kvm_msr_entry *entries;
1897 int r, n;
1898 unsigned size;
1899
1900 r = -EFAULT;
1901 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
1902 goto out;
1903
1904 r = -E2BIG;
1905 if (msrs.nmsrs >= MAX_IO_MSRS)
1906 goto out;
1907
1908 r = -ENOMEM;
1909 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
1910 entries = kmalloc(size, GFP_KERNEL);
1911 if (!entries)
1912 goto out;
1913
1914 r = -EFAULT;
1915 if (copy_from_user(entries, user_msrs->entries, size))
1916 goto out_free;
1917
1918 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
1919 if (r < 0)
1920 goto out_free;
1921
1922 r = -EFAULT;
1923 if (writeback && copy_to_user(user_msrs->entries, entries, size))
1924 goto out_free;
1925
1926 r = n;
1927
1928out_free:
1929 kfree(entries);
1930out:
1931 return r;
1932}
1933
1934int kvm_dev_ioctl_check_extension(long ext)
1935{
1936 int r;
1937
1938 switch (ext) {
1939 case KVM_CAP_IRQCHIP:
1940 case KVM_CAP_HLT:
1941 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
1942 case KVM_CAP_SET_TSS_ADDR:
1943 case KVM_CAP_EXT_CPUID:
1944 case KVM_CAP_CLOCKSOURCE:
1945 case KVM_CAP_PIT:
1946 case KVM_CAP_NOP_IO_DELAY:
1947 case KVM_CAP_MP_STATE:
1948 case KVM_CAP_SYNC_MMU:
1949 case KVM_CAP_USER_NMI:
1950 case KVM_CAP_REINJECT_CONTROL:
1951 case KVM_CAP_IRQ_INJECT_STATUS:
1952 case KVM_CAP_ASSIGN_DEV_IRQ:
1953 case KVM_CAP_IRQFD:
1954 case KVM_CAP_IOEVENTFD:
1955 case KVM_CAP_PIT2:
1956 case KVM_CAP_PIT_STATE2:
1957 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
1958 case KVM_CAP_XEN_HVM:
1959 case KVM_CAP_ADJUST_CLOCK:
1960 case KVM_CAP_VCPU_EVENTS:
1961 case KVM_CAP_HYPERV:
1962 case KVM_CAP_HYPERV_VAPIC:
1963 case KVM_CAP_HYPERV_SPIN:
1964 case KVM_CAP_PCI_SEGMENT:
1965 case KVM_CAP_DEBUGREGS:
1966 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1967 case KVM_CAP_XSAVE:
1968 case KVM_CAP_ASYNC_PF:
1969 r = 1;
1970 break;
1971 case KVM_CAP_COALESCED_MMIO:
1972 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
1973 break;
1974 case KVM_CAP_VAPIC:
1975 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
1976 break;
1977 case KVM_CAP_NR_VCPUS:
1978 r = KVM_MAX_VCPUS;
1979 break;
1980 case KVM_CAP_NR_MEMSLOTS:
1981 r = KVM_MEMORY_SLOTS;
1982 break;
1983 case KVM_CAP_PV_MMU:
1984 r = 0;
1985 break;
1986 case KVM_CAP_IOMMU:
1987 r = iommu_found();
1988 break;
1989 case KVM_CAP_MCE:
1990 r = KVM_MAX_MCE_BANKS;
1991 break;
1992 case KVM_CAP_XCRS:
1993 r = cpu_has_xsave;
1994 break;
1995 default:
1996 r = 0;
1997 break;
1998 }
1999 return r;
2000
2001}
2002
2003long kvm_arch_dev_ioctl(struct file *filp,
2004 unsigned int ioctl, unsigned long arg)
2005{
2006 void __user *argp = (void __user *)arg;
2007 long r;
2008
2009 switch (ioctl) {
2010 case KVM_GET_MSR_INDEX_LIST: {
2011 struct kvm_msr_list __user *user_msr_list = argp;
2012 struct kvm_msr_list msr_list;
2013 unsigned n;
2014
2015 r = -EFAULT;
2016 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2017 goto out;
2018 n = msr_list.nmsrs;
2019 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2020 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2021 goto out;
2022 r = -E2BIG;
2023 if (n < msr_list.nmsrs)
2024 goto out;
2025 r = -EFAULT;
2026 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2027 num_msrs_to_save * sizeof(u32)))
2028 goto out;
2029 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2030 &emulated_msrs,
2031 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2032 goto out;
2033 r = 0;
2034 break;
2035 }
2036 case KVM_GET_SUPPORTED_CPUID: {
2037 struct kvm_cpuid2 __user *cpuid_arg = argp;
2038 struct kvm_cpuid2 cpuid;
2039
2040 r = -EFAULT;
2041 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2042 goto out;
2043 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
2044 cpuid_arg->entries);
2045 if (r)
2046 goto out;
2047
2048 r = -EFAULT;
2049 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2050 goto out;
2051 r = 0;
2052 break;
2053 }
2054 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2055 u64 mce_cap;
2056
2057 mce_cap = KVM_MCE_CAP_SUPPORTED;
2058 r = -EFAULT;
2059 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2060 goto out;
2061 r = 0;
2062 break;
2063 }
2064 default:
2065 r = -EINVAL;
2066 }
2067out:
2068 return r;
2069}
2070
2071static void wbinvd_ipi(void *garbage)
2072{
2073 wbinvd();
2074}
2075
2076static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2077{
2078 return vcpu->kvm->arch.iommu_domain &&
2079 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2080}
2081
2082void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2083{
2084
2085 if (need_emulate_wbinvd(vcpu)) {
2086 if (kvm_x86_ops->has_wbinvd_exit())
2087 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2088 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2089 smp_call_function_single(vcpu->cpu,
2090 wbinvd_ipi, NULL, 1);
2091 }
2092
2093 kvm_x86_ops->vcpu_load(vcpu, cpu);
2094 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2095
2096 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2097 native_read_tsc() - vcpu->arch.last_host_tsc;
2098 if (tsc_delta < 0)
2099 mark_tsc_unstable("KVM discovered backwards TSC");
2100 if (check_tsc_unstable()) {
2101 kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
2102 vcpu->arch.tsc_catchup = 1;
2103 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2104 }
2105 if (vcpu->cpu != cpu)
2106 kvm_migrate_timers(vcpu);
2107 vcpu->cpu = cpu;
2108 }
2109}
2110
2111void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2112{
2113 kvm_x86_ops->vcpu_put(vcpu);
2114 kvm_put_guest_fpu(vcpu);
2115 vcpu->arch.last_host_tsc = native_read_tsc();
2116}
2117
2118static int is_efer_nx(void)
2119{
2120 unsigned long long efer = 0;
2121
2122 rdmsrl_safe(MSR_EFER, &efer);
2123 return efer & EFER_NX;
2124}
2125
2126static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
2127{
2128 int i;
2129 struct kvm_cpuid_entry2 *e, *entry;
2130
2131 entry = NULL;
2132 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2133 e = &vcpu->arch.cpuid_entries[i];
2134 if (e->function == 0x80000001) {
2135 entry = e;
2136 break;
2137 }
2138 }
2139 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
2140 entry->edx &= ~(1 << 20);
2141 printk(KERN_INFO "kvm: guest NX capability removed\n");
2142 }
2143}
2144
2145
2146static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
2147 struct kvm_cpuid *cpuid,
2148 struct kvm_cpuid_entry __user *entries)
2149{
2150 int r, i;
2151 struct kvm_cpuid_entry *cpuid_entries;
2152
2153 r = -E2BIG;
2154 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2155 goto out;
2156 r = -ENOMEM;
2157 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
2158 if (!cpuid_entries)
2159 goto out;
2160 r = -EFAULT;
2161 if (copy_from_user(cpuid_entries, entries,
2162 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
2163 goto out_free;
2164 for (i = 0; i < cpuid->nent; i++) {
2165 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
2166 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
2167 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
2168 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
2169 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
2170 vcpu->arch.cpuid_entries[i].index = 0;
2171 vcpu->arch.cpuid_entries[i].flags = 0;
2172 vcpu->arch.cpuid_entries[i].padding[0] = 0;
2173 vcpu->arch.cpuid_entries[i].padding[1] = 0;
2174 vcpu->arch.cpuid_entries[i].padding[2] = 0;
2175 }
2176 vcpu->arch.cpuid_nent = cpuid->nent;
2177 cpuid_fix_nx_cap(vcpu);
2178 r = 0;
2179 kvm_apic_set_version(vcpu);
2180 kvm_x86_ops->cpuid_update(vcpu);
2181 update_cpuid(vcpu);
2182
2183out_free:
2184 vfree(cpuid_entries);
2185out:
2186 return r;
2187}
2188
2189static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
2190 struct kvm_cpuid2 *cpuid,
2191 struct kvm_cpuid_entry2 __user *entries)
2192{
2193 int r;
2194
2195 r = -E2BIG;
2196 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2197 goto out;
2198 r = -EFAULT;
2199 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
2200 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
2201 goto out;
2202 vcpu->arch.cpuid_nent = cpuid->nent;
2203 kvm_apic_set_version(vcpu);
2204 kvm_x86_ops->cpuid_update(vcpu);
2205 update_cpuid(vcpu);
2206 return 0;
2207
2208out:
2209 return r;
2210}
2211
2212static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
2213 struct kvm_cpuid2 *cpuid,
2214 struct kvm_cpuid_entry2 __user *entries)
2215{
2216 int r;
2217
2218 r = -E2BIG;
2219 if (cpuid->nent < vcpu->arch.cpuid_nent)
2220 goto out;
2221 r = -EFAULT;
2222 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
2223 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
2224 goto out;
2225 return 0;
2226
2227out:
2228 cpuid->nent = vcpu->arch.cpuid_nent;
2229 return r;
2230}
2231
2232static void cpuid_mask(u32 *word, int wordnum)
2233{
2234 *word &= boot_cpu_data.x86_capability[wordnum];
2235}
2236
2237static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2238 u32 index)
2239{
2240 entry->function = function;
2241 entry->index = index;
2242 cpuid_count(entry->function, entry->index,
2243 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
2244 entry->flags = 0;
2245}
2246
2247#define F(x) bit(X86_FEATURE_##x)
2248
2249static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2250 u32 index, int *nent, int maxnent)
2251{
2252 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
2253#ifdef CONFIG_X86_64
2254 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
2255 ? F(GBPAGES) : 0;
2256 unsigned f_lm = F(LM);
2257#else
2258 unsigned f_gbpages = 0;
2259 unsigned f_lm = 0;
2260#endif
2261 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
2262
2263
2264 const u32 kvm_supported_word0_x86_features =
2265 F(FPU) | F(VME) | F(DE) | F(PSE) |
2266 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
2267 F(CX8) | F(APIC) | 0 | F(SEP) |
2268 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
2269 F(PAT) | F(PSE36) | 0 | F(CLFLSH) |
2270 0 | F(MMX) |
2271 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
2272 0 ;
2273
2274 const u32 kvm_supported_word1_x86_features =
2275 F(FPU) | F(VME) | F(DE) | F(PSE) |
2276 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
2277 F(CX8) | F(APIC) | 0 | F(SYSCALL) |
2278 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
2279 F(PAT) | F(PSE36) | 0 |
2280 f_nx | 0 | F(MMXEXT) | F(MMX) |
2281 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
2282 0 | f_lm | F(3DNOWEXT) | F(3DNOW);
2283
2284 const u32 kvm_supported_word4_x86_features =
2285 F(XMM3) | F(PCLMULQDQ) | 0 |
2286 0 |
2287 0 | F(SSSE3) | 0 | 0 |
2288 0 | F(CX16) | 0 |
2289 0 | F(XMM4_1) |
2290 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
2291 0 | F(AES) | F(XSAVE) | 0 | F(AVX) |
2292 F(F16C);
2293
2294 const u32 kvm_supported_word6_x86_features =
2295 F(LAHF_LM) | F(CMP_LEGACY) | 0 | 0 |
2296 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
2297 F(3DNOWPREFETCH) | 0 | 0 | F(XOP) |
2298 0 | F(FMA4) | F(TBM);
2299
2300
2301 get_cpu();
2302 do_cpuid_1_ent(entry, function, index);
2303 ++*nent;
2304
2305 switch (function) {
2306 case 0:
2307 entry->eax = min(entry->eax, (u32)0xd);
2308 break;
2309 case 1:
2310 entry->edx &= kvm_supported_word0_x86_features;
2311 cpuid_mask(&entry->edx, 0);
2312 entry->ecx &= kvm_supported_word4_x86_features;
2313 cpuid_mask(&entry->ecx, 4);
2314
2315
2316 entry->ecx |= F(X2APIC);
2317 break;
2318
2319
2320
2321
2322 case 2: {
2323 int t, times = entry->eax & 0xff;
2324
2325 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
2326 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
2327 for (t = 1; t < times && *nent < maxnent; ++t) {
2328 do_cpuid_1_ent(&entry[t], function, 0);
2329 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
2330 ++*nent;
2331 }
2332 break;
2333 }
2334
2335 case 4: {
2336 int i, cache_type;
2337
2338 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2339
2340 for (i = 1; *nent < maxnent; ++i) {
2341 cache_type = entry[i - 1].eax & 0x1f;
2342 if (!cache_type)
2343 break;
2344 do_cpuid_1_ent(&entry[i], function, i);
2345 entry[i].flags |=
2346 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2347 ++*nent;
2348 }
2349 break;
2350 }
2351 case 0xb: {
2352 int i, level_type;
2353
2354 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2355
2356 for (i = 1; *nent < maxnent; ++i) {
2357 level_type = entry[i - 1].ecx & 0xff00;
2358 if (!level_type)
2359 break;
2360 do_cpuid_1_ent(&entry[i], function, i);
2361 entry[i].flags |=
2362 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2363 ++*nent;
2364 }
2365 break;
2366 }
2367 case 0xd: {
2368 int i;
2369
2370 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2371 for (i = 1; *nent < maxnent; ++i) {
2372 if (entry[i - 1].eax == 0 && i != 2)
2373 break;
2374 do_cpuid_1_ent(&entry[i], function, i);
2375 entry[i].flags |=
2376 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
2377 ++*nent;
2378 }
2379 break;
2380 }
2381 case KVM_CPUID_SIGNATURE: {
2382 char signature[12] = "KVMKVMKVM\0\0";
2383 u32 *sigptr = (u32 *)signature;
2384 entry->eax = 0;
2385 entry->ebx = sigptr[0];
2386 entry->ecx = sigptr[1];
2387 entry->edx = sigptr[2];
2388 break;
2389 }
2390 case KVM_CPUID_FEATURES:
2391 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
2392 (1 << KVM_FEATURE_NOP_IO_DELAY) |
2393 (1 << KVM_FEATURE_CLOCKSOURCE2) |
2394 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
2395 entry->ebx = 0;
2396 entry->ecx = 0;
2397 entry->edx = 0;
2398 break;
2399 case 0x80000000:
2400 entry->eax = min(entry->eax, 0x8000001a);
2401 break;
2402 case 0x80000001:
2403 entry->edx &= kvm_supported_word1_x86_features;
2404 cpuid_mask(&entry->edx, 1);
2405 entry->ecx &= kvm_supported_word6_x86_features;
2406 cpuid_mask(&entry->ecx, 6);
2407 break;
2408 }
2409
2410 kvm_x86_ops->set_supported_cpuid(function, entry);
2411
2412 put_cpu();
2413}
2414
2415#undef F
2416
2417static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
2418 struct kvm_cpuid_entry2 __user *entries)
2419{
2420 struct kvm_cpuid_entry2 *cpuid_entries;
2421 int limit, nent = 0, r = -E2BIG;
2422 u32 func;
2423
2424 if (cpuid->nent < 1)
2425 goto out;
2426 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
2427 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
2428 r = -ENOMEM;
2429 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
2430 if (!cpuid_entries)
2431 goto out;
2432
2433 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
2434 limit = cpuid_entries[0].eax;
2435 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
2436 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2437 &nent, cpuid->nent);
2438 r = -E2BIG;
2439 if (nent >= cpuid->nent)
2440 goto out_free;
2441
2442 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
2443 limit = cpuid_entries[nent - 1].eax;
2444 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
2445 do_cpuid_ent(&cpuid_entries[nent], func, 0,
2446 &nent, cpuid->nent);
2447
2448
2449
2450 r = -E2BIG;
2451 if (nent >= cpuid->nent)
2452 goto out_free;
2453
2454 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_SIGNATURE, 0, &nent,
2455 cpuid->nent);
2456
2457 r = -E2BIG;
2458 if (nent >= cpuid->nent)
2459 goto out_free;
2460
2461 do_cpuid_ent(&cpuid_entries[nent], KVM_CPUID_FEATURES, 0, &nent,
2462 cpuid->nent);
2463
2464 r = -E2BIG;
2465 if (nent >= cpuid->nent)
2466 goto out_free;
2467
2468 r = -EFAULT;
2469 if (copy_to_user(entries, cpuid_entries,
2470 nent * sizeof(struct kvm_cpuid_entry2)))
2471 goto out_free;
2472 cpuid->nent = nent;
2473 r = 0;
2474
2475out_free:
2476 vfree(cpuid_entries);
2477out:
2478 return r;
2479}
2480
2481static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2482 struct kvm_lapic_state *s)
2483{
2484 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2485
2486 return 0;
2487}
2488
2489static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2490 struct kvm_lapic_state *s)
2491{
2492 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2493 kvm_apic_post_state_restore(vcpu);
2494 update_cr8_intercept(vcpu);
2495
2496 return 0;
2497}
2498
2499static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2500 struct kvm_interrupt *irq)
2501{
2502 if (irq->irq < 0 || irq->irq >= 256)
2503 return -EINVAL;
2504 if (irqchip_in_kernel(vcpu->kvm))
2505 return -ENXIO;
2506
2507 kvm_queue_interrupt(vcpu, irq->irq, false);
2508 kvm_make_request(KVM_REQ_EVENT, vcpu);
2509
2510 return 0;
2511}
2512
2513static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2514{
2515 kvm_inject_nmi(vcpu);
2516
2517 return 0;
2518}
2519
2520static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2521 struct kvm_tpr_access_ctl *tac)
2522{
2523 if (tac->flags)
2524 return -EINVAL;
2525 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2526 return 0;
2527}
2528
2529static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2530 u64 mcg_cap)
2531{
2532 int r;
2533 unsigned bank_num = mcg_cap & 0xff, bank;
2534
2535 r = -EINVAL;
2536 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2537 goto out;
2538 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2539 goto out;
2540 r = 0;
2541 vcpu->arch.mcg_cap = mcg_cap;
2542
2543 if (mcg_cap & MCG_CTL_P)
2544 vcpu->arch.mcg_ctl = ~(u64)0;
2545
2546 for (bank = 0; bank < bank_num; bank++)
2547 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2548out:
2549 return r;
2550}
2551
2552static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2553 struct kvm_x86_mce *mce)
2554{
2555 u64 mcg_cap = vcpu->arch.mcg_cap;
2556 unsigned bank_num = mcg_cap & 0xff;
2557 u64 *banks = vcpu->arch.mce_banks;
2558
2559 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2560 return -EINVAL;
2561
2562
2563
2564
2565 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2566 vcpu->arch.mcg_ctl != ~(u64)0)
2567 return 0;
2568 banks += 4 * mce->bank;
2569
2570
2571
2572
2573 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2574 return 0;
2575 if (mce->status & MCI_STATUS_UC) {
2576 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2577 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2578 printk(KERN_DEBUG "kvm: set_mce: "
2579 "injects mce exception while "
2580 "previous one is in progress!\n");
2581 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2582 return 0;
2583 }
2584 if (banks[1] & MCI_STATUS_VAL)
2585 mce->status |= MCI_STATUS_OVER;
2586 banks[2] = mce->addr;
2587 banks[3] = mce->misc;
2588 vcpu->arch.mcg_status = mce->mcg_status;
2589 banks[1] = mce->status;
2590 kvm_queue_exception(vcpu, MC_VECTOR);
2591 } else if (!(banks[1] & MCI_STATUS_VAL)
2592 || !(banks[1] & MCI_STATUS_UC)) {
2593 if (banks[1] & MCI_STATUS_VAL)
2594 mce->status |= MCI_STATUS_OVER;
2595 banks[2] = mce->addr;
2596 banks[3] = mce->misc;
2597 banks[1] = mce->status;
2598 } else
2599 banks[1] |= MCI_STATUS_OVER;
2600 return 0;
2601}
2602
2603static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2604 struct kvm_vcpu_events *events)
2605{
2606 events->exception.injected =
2607 vcpu->arch.exception.pending &&
2608 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2609 events->exception.nr = vcpu->arch.exception.nr;
2610 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2611 events->exception.pad = 0;
2612 events->exception.error_code = vcpu->arch.exception.error_code;
2613
2614 events->interrupt.injected =
2615 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2616 events->interrupt.nr = vcpu->arch.interrupt.nr;
2617 events->interrupt.soft = 0;
2618 events->interrupt.shadow =
2619 kvm_x86_ops->get_interrupt_shadow(vcpu,
2620 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2621
2622 events->nmi.injected = vcpu->arch.nmi_injected;
2623 events->nmi.pending = vcpu->arch.nmi_pending;
2624 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2625 events->nmi.pad = 0;
2626
2627 events->sipi_vector = vcpu->arch.sipi_vector;
2628
2629 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2630 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2631 | KVM_VCPUEVENT_VALID_SHADOW);
2632 memset(&events->reserved, 0, sizeof(events->reserved));
2633}
2634
2635static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2636 struct kvm_vcpu_events *events)
2637{
2638 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2639 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2640 | KVM_VCPUEVENT_VALID_SHADOW))
2641 return -EINVAL;
2642
2643 vcpu->arch.exception.pending = events->exception.injected;
2644 vcpu->arch.exception.nr = events->exception.nr;
2645 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2646 vcpu->arch.exception.error_code = events->exception.error_code;
2647
2648 vcpu->arch.interrupt.pending = events->interrupt.injected;
2649 vcpu->arch.interrupt.nr = events->interrupt.nr;
2650 vcpu->arch.interrupt.soft = events->interrupt.soft;
2651 if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
2652 kvm_pic_clear_isr_ack(vcpu->kvm);
2653 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2654 kvm_x86_ops->set_interrupt_shadow(vcpu,
2655 events->interrupt.shadow);
2656
2657 vcpu->arch.nmi_injected = events->nmi.injected;
2658 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2659 vcpu->arch.nmi_pending = events->nmi.pending;
2660 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2661
2662 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2663 vcpu->arch.sipi_vector = events->sipi_vector;
2664
2665 kvm_make_request(KVM_REQ_EVENT, vcpu);
2666
2667 return 0;
2668}
2669
2670static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2671 struct kvm_debugregs *dbgregs)
2672{
2673 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2674 dbgregs->dr6 = vcpu->arch.dr6;
2675 dbgregs->dr7 = vcpu->arch.dr7;
2676 dbgregs->flags = 0;
2677 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
2678}
2679
2680static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2681 struct kvm_debugregs *dbgregs)
2682{
2683 if (dbgregs->flags)
2684 return -EINVAL;
2685
2686 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2687 vcpu->arch.dr6 = dbgregs->dr6;
2688 vcpu->arch.dr7 = dbgregs->dr7;
2689
2690 return 0;
2691}
2692
2693static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2694 struct kvm_xsave *guest_xsave)
2695{
2696 if (cpu_has_xsave)
2697 memcpy(guest_xsave->region,
2698 &vcpu->arch.guest_fpu.state->xsave,
2699 xstate_size);
2700 else {
2701 memcpy(guest_xsave->region,
2702 &vcpu->arch.guest_fpu.state->fxsave,
2703 sizeof(struct i387_fxsave_struct));
2704 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
2705 XSTATE_FPSSE;
2706 }
2707}
2708
2709static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
2710 struct kvm_xsave *guest_xsave)
2711{
2712 u64 xstate_bv =
2713 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
2714
2715 if (cpu_has_xsave)
2716 memcpy(&vcpu->arch.guest_fpu.state->xsave,
2717 guest_xsave->region, xstate_size);
2718 else {
2719 if (xstate_bv & ~XSTATE_FPSSE)
2720 return -EINVAL;
2721 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
2722 guest_xsave->region, sizeof(struct i387_fxsave_struct));
2723 }
2724 return 0;
2725}
2726
2727static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
2728 struct kvm_xcrs *guest_xcrs)
2729{
2730 if (!cpu_has_xsave) {
2731 guest_xcrs->nr_xcrs = 0;
2732 return;
2733 }
2734
2735 guest_xcrs->nr_xcrs = 1;
2736 guest_xcrs->flags = 0;
2737 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
2738 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
2739}
2740
2741static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
2742 struct kvm_xcrs *guest_xcrs)
2743{
2744 int i, r = 0;
2745
2746 if (!cpu_has_xsave)
2747 return -EINVAL;
2748
2749 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
2750 return -EINVAL;
2751
2752 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
2753
2754 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
2755 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
2756 guest_xcrs->xcrs[0].value);
2757 break;
2758 }
2759 if (r)
2760 r = -EINVAL;
2761 return r;
2762}
2763
2764long kvm_arch_vcpu_ioctl(struct file *filp,
2765 unsigned int ioctl, unsigned long arg)
2766{
2767 struct kvm_vcpu *vcpu = filp->private_data;
2768 void __user *argp = (void __user *)arg;
2769 int r;
2770 union {
2771 struct kvm_lapic_state *lapic;
2772 struct kvm_xsave *xsave;
2773 struct kvm_xcrs *xcrs;
2774 void *buffer;
2775 } u;
2776
2777 u.buffer = NULL;
2778 switch (ioctl) {
2779 case KVM_GET_LAPIC: {
2780 r = -EINVAL;
2781 if (!vcpu->arch.apic)
2782 goto out;
2783 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2784
2785 r = -ENOMEM;
2786 if (!u.lapic)
2787 goto out;
2788 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
2789 if (r)
2790 goto out;
2791 r = -EFAULT;
2792 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
2793 goto out;
2794 r = 0;
2795 break;
2796 }
2797 case KVM_SET_LAPIC: {
2798 r = -EINVAL;
2799 if (!vcpu->arch.apic)
2800 goto out;
2801 u.lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2802 r = -ENOMEM;
2803 if (!u.lapic)
2804 goto out;
2805 r = -EFAULT;
2806 if (copy_from_user(u.lapic, argp, sizeof(struct kvm_lapic_state)))
2807 goto out;
2808 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
2809 if (r)
2810 goto out;
2811 r = 0;
2812 break;
2813 }
2814 case KVM_INTERRUPT: {
2815 struct kvm_interrupt irq;
2816
2817 r = -EFAULT;
2818 if (copy_from_user(&irq, argp, sizeof irq))
2819 goto out;
2820 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
2821 if (r)
2822 goto out;
2823 r = 0;
2824 break;
2825 }
2826 case KVM_NMI: {
2827 r = kvm_vcpu_ioctl_nmi(vcpu);
2828 if (r)
2829 goto out;
2830 r = 0;
2831 break;
2832 }
2833 case KVM_SET_CPUID: {
2834 struct kvm_cpuid __user *cpuid_arg = argp;
2835 struct kvm_cpuid cpuid;
2836
2837 r = -EFAULT;
2838 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2839 goto out;
2840 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
2841 if (r)
2842 goto out;
2843 break;
2844 }
2845 case KVM_SET_CPUID2: {
2846 struct kvm_cpuid2 __user *cpuid_arg = argp;
2847 struct kvm_cpuid2 cpuid;
2848
2849 r = -EFAULT;
2850 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2851 goto out;
2852 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
2853 cpuid_arg->entries);
2854 if (r)
2855 goto out;
2856 break;
2857 }
2858 case KVM_GET_CPUID2: {
2859 struct kvm_cpuid2 __user *cpuid_arg = argp;
2860 struct kvm_cpuid2 cpuid;
2861
2862 r = -EFAULT;
2863 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2864 goto out;
2865 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
2866 cpuid_arg->entries);
2867 if (r)
2868 goto out;
2869 r = -EFAULT;
2870 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2871 goto out;
2872 r = 0;
2873 break;
2874 }
2875 case KVM_GET_MSRS:
2876 r = msr_io(vcpu, argp, kvm_get_msr, 1);
2877 break;
2878 case KVM_SET_MSRS:
2879 r = msr_io(vcpu, argp, do_set_msr, 0);
2880 break;
2881 case KVM_TPR_ACCESS_REPORTING: {
2882 struct kvm_tpr_access_ctl tac;
2883
2884 r = -EFAULT;
2885 if (copy_from_user(&tac, argp, sizeof tac))
2886 goto out;
2887 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
2888 if (r)
2889 goto out;
2890 r = -EFAULT;
2891 if (copy_to_user(argp, &tac, sizeof tac))
2892 goto out;
2893 r = 0;
2894 break;
2895 };
2896 case KVM_SET_VAPIC_ADDR: {
2897 struct kvm_vapic_addr va;
2898
2899 r = -EINVAL;
2900 if (!irqchip_in_kernel(vcpu->kvm))
2901 goto out;
2902 r = -EFAULT;
2903 if (copy_from_user(&va, argp, sizeof va))
2904 goto out;
2905 r = 0;
2906 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
2907 break;
2908 }
2909 case KVM_X86_SETUP_MCE: {
2910 u64 mcg_cap;
2911
2912 r = -EFAULT;
2913 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
2914 goto out;
2915 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
2916 break;
2917 }
2918 case KVM_X86_SET_MCE: {
2919 struct kvm_x86_mce mce;
2920
2921 r = -EFAULT;
2922 if (copy_from_user(&mce, argp, sizeof mce))
2923 goto out;
2924 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
2925 break;
2926 }
2927 case KVM_GET_VCPU_EVENTS: {
2928 struct kvm_vcpu_events events;
2929
2930 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
2931
2932 r = -EFAULT;
2933 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
2934 break;
2935 r = 0;
2936 break;
2937 }
2938 case KVM_SET_VCPU_EVENTS: {
2939 struct kvm_vcpu_events events;
2940
2941 r = -EFAULT;
2942 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
2943 break;
2944
2945 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
2946 break;
2947 }
2948 case KVM_GET_DEBUGREGS: {
2949 struct kvm_debugregs dbgregs;
2950
2951 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
2952
2953 r = -EFAULT;
2954 if (copy_to_user(argp, &dbgregs,
2955 sizeof(struct kvm_debugregs)))
2956 break;
2957 r = 0;
2958 break;
2959 }
2960 case KVM_SET_DEBUGREGS: {
2961 struct kvm_debugregs dbgregs;
2962
2963 r = -EFAULT;
2964 if (copy_from_user(&dbgregs, argp,
2965 sizeof(struct kvm_debugregs)))
2966 break;
2967
2968 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
2969 break;
2970 }
2971 case KVM_GET_XSAVE: {
2972 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
2973 r = -ENOMEM;
2974 if (!u.xsave)
2975 break;
2976
2977 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
2978
2979 r = -EFAULT;
2980 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
2981 break;
2982 r = 0;
2983 break;
2984 }
2985 case KVM_SET_XSAVE: {
2986 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
2987 r = -ENOMEM;
2988 if (!u.xsave)
2989 break;
2990
2991 r = -EFAULT;
2992 if (copy_from_user(u.xsave, argp, sizeof(struct kvm_xsave)))
2993 break;
2994
2995 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
2996 break;
2997 }
2998 case KVM_GET_XCRS: {
2999 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3000 r = -ENOMEM;
3001 if (!u.xcrs)
3002 break;
3003
3004 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3005
3006 r = -EFAULT;
3007 if (copy_to_user(argp, u.xcrs,
3008 sizeof(struct kvm_xcrs)))
3009 break;
3010 r = 0;
3011 break;
3012 }
3013 case KVM_SET_XCRS: {
3014 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3015 r = -ENOMEM;
3016 if (!u.xcrs)
3017 break;
3018
3019 r = -EFAULT;
3020 if (copy_from_user(u.xcrs, argp,
3021 sizeof(struct kvm_xcrs)))
3022 break;
3023
3024 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3025 break;
3026 }
3027 default:
3028 r = -EINVAL;
3029 }
3030out:
3031 kfree(u.buffer);
3032 return r;
3033}
3034
3035static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3036{
3037 int ret;
3038
3039 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3040 return -1;
3041 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3042 return ret;
3043}
3044
3045static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3046 u64 ident_addr)
3047{
3048 kvm->arch.ept_identity_map_addr = ident_addr;
3049 return 0;
3050}
3051
3052static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3053 u32 kvm_nr_mmu_pages)
3054{
3055 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3056 return -EINVAL;
3057
3058 mutex_lock(&kvm->slots_lock);
3059 spin_lock(&kvm->mmu_lock);
3060
3061 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3062 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3063
3064 spin_unlock(&kvm->mmu_lock);
3065 mutex_unlock(&kvm->slots_lock);
3066 return 0;
3067}
3068
3069static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3070{
3071 return kvm->arch.n_max_mmu_pages;
3072}
3073
3074static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3075{
3076 int r;
3077
3078 r = 0;
3079 switch (chip->chip_id) {
3080 case KVM_IRQCHIP_PIC_MASTER:
3081 memcpy(&chip->chip.pic,
3082 &pic_irqchip(kvm)->pics[0],
3083 sizeof(struct kvm_pic_state));
3084 break;
3085 case KVM_IRQCHIP_PIC_SLAVE:
3086 memcpy(&chip->chip.pic,
3087 &pic_irqchip(kvm)->pics[1],
3088 sizeof(struct kvm_pic_state));
3089 break;
3090 case KVM_IRQCHIP_IOAPIC:
3091 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3092 break;
3093 default:
3094 r = -EINVAL;
3095 break;
3096 }
3097 return r;
3098}
3099
3100static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3101{
3102 int r;
3103
3104 r = 0;
3105 switch (chip->chip_id) {
3106 case KVM_IRQCHIP_PIC_MASTER:
3107 spin_lock(&pic_irqchip(kvm)->lock);
3108 memcpy(&pic_irqchip(kvm)->pics[0],
3109 &chip->chip.pic,
3110 sizeof(struct kvm_pic_state));
3111 spin_unlock(&pic_irqchip(kvm)->lock);
3112 break;
3113 case KVM_IRQCHIP_PIC_SLAVE:
3114 spin_lock(&pic_irqchip(kvm)->lock);
3115 memcpy(&pic_irqchip(kvm)->pics[1],
3116 &chip->chip.pic,
3117 sizeof(struct kvm_pic_state));
3118 spin_unlock(&pic_irqchip(kvm)->lock);
3119 break;
3120 case KVM_IRQCHIP_IOAPIC:
3121 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3122 break;
3123 default:
3124 r = -EINVAL;
3125 break;
3126 }
3127 kvm_pic_update_irq(pic_irqchip(kvm));
3128 return r;
3129}
3130
3131static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3132{
3133 int r = 0;
3134
3135 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3136 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3137 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3138 return r;
3139}
3140
3141static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3142{
3143 int r = 0;
3144
3145 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3146 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3147 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3148 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3149 return r;
3150}
3151
3152static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3153{
3154 int r = 0;
3155
3156 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3157 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3158 sizeof(ps->channels));
3159 ps->flags = kvm->arch.vpit->pit_state.flags;
3160 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3161 memset(&ps->reserved, 0, sizeof(ps->reserved));
3162 return r;
3163}
3164
3165static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3166{
3167 int r = 0, start = 0;
3168 u32 prev_legacy, cur_legacy;
3169 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3170 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3171 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3172 if (!prev_legacy && cur_legacy)
3173 start = 1;
3174 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3175 sizeof(kvm->arch.vpit->pit_state.channels));
3176 kvm->arch.vpit->pit_state.flags = ps->flags;
3177 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3178 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3179 return r;
3180}
3181
3182static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3183 struct kvm_reinject_control *control)
3184{
3185 if (!kvm->arch.vpit)
3186 return -ENXIO;
3187 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3188 kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
3189 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3190 return 0;
3191}
3192
3193
3194
3195
3196int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
3197 struct kvm_dirty_log *log)
3198{
3199 int r, i;
3200 struct kvm_memory_slot *memslot;
3201 unsigned long n;
3202 unsigned long is_dirty = 0;
3203
3204 mutex_lock(&kvm->slots_lock);
3205
3206 r = -EINVAL;
3207 if (log->slot >= KVM_MEMORY_SLOTS)
3208 goto out;
3209
3210 memslot = &kvm->memslots->memslots[log->slot];
3211 r = -ENOENT;
3212 if (!memslot->dirty_bitmap)
3213 goto out;
3214
3215 n = kvm_dirty_bitmap_bytes(memslot);
3216
3217 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
3218 is_dirty = memslot->dirty_bitmap[i];
3219
3220
3221 if (is_dirty) {
3222 struct kvm_memslots *slots, *old_slots;
3223 unsigned long *dirty_bitmap;
3224
3225 dirty_bitmap = memslot->dirty_bitmap_head;
3226 if (memslot->dirty_bitmap == dirty_bitmap)
3227 dirty_bitmap += n / sizeof(long);
3228 memset(dirty_bitmap, 0, n);
3229
3230 r = -ENOMEM;
3231 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
3232 if (!slots)
3233 goto out;
3234 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
3235 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
3236 slots->generation++;
3237
3238 old_slots = kvm->memslots;
3239 rcu_assign_pointer(kvm->memslots, slots);
3240 synchronize_srcu_expedited(&kvm->srcu);
3241 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
3242 kfree(old_slots);
3243
3244 spin_lock(&kvm->mmu_lock);
3245 kvm_mmu_slot_remove_write_access(kvm, log->slot);
3246 spin_unlock(&kvm->mmu_lock);
3247
3248 r = -EFAULT;
3249 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
3250 goto out;
3251 } else {
3252 r = -EFAULT;
3253 if (clear_user(log->dirty_bitmap, n))
3254 goto out;
3255 }
3256
3257 r = 0;
3258out:
3259 mutex_unlock(&kvm->slots_lock);
3260 return r;
3261}
3262
3263long kvm_arch_vm_ioctl(struct file *filp,
3264 unsigned int ioctl, unsigned long arg)
3265{
3266 struct kvm *kvm = filp->private_data;
3267 void __user *argp = (void __user *)arg;
3268 int r = -ENOTTY;
3269
3270
3271
3272
3273
3274 union {
3275 struct kvm_pit_state ps;
3276 struct kvm_pit_state2 ps2;
3277 struct kvm_pit_config pit_config;
3278 } u;
3279
3280 switch (ioctl) {
3281 case KVM_SET_TSS_ADDR:
3282 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3283 if (r < 0)
3284 goto out;
3285 break;
3286 case KVM_SET_IDENTITY_MAP_ADDR: {
3287 u64 ident_addr;
3288
3289 r = -EFAULT;
3290 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3291 goto out;
3292 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3293 if (r < 0)
3294 goto out;
3295 break;
3296 }
3297 case KVM_SET_NR_MMU_PAGES:
3298 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3299 if (r)
3300 goto out;
3301 break;
3302 case KVM_GET_NR_MMU_PAGES:
3303 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3304 break;
3305 case KVM_CREATE_IRQCHIP: {
3306 struct kvm_pic *vpic;
3307
3308 mutex_lock(&kvm->lock);
3309 r = -EEXIST;
3310 if (kvm->arch.vpic)
3311 goto create_irqchip_unlock;
3312 r = -ENOMEM;
3313 vpic = kvm_create_pic(kvm);
3314 if (vpic) {
3315 r = kvm_ioapic_init(kvm);
3316 if (r) {
3317 mutex_lock(&kvm->slots_lock);
3318 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3319 &vpic->dev);
3320 mutex_unlock(&kvm->slots_lock);
3321 kfree(vpic);
3322 goto create_irqchip_unlock;
3323 }
3324 } else
3325 goto create_irqchip_unlock;
3326 smp_wmb();
3327 kvm->arch.vpic = vpic;
3328 smp_wmb();
3329 r = kvm_setup_default_irq_routing(kvm);
3330 if (r) {
3331 mutex_lock(&kvm->slots_lock);
3332 mutex_lock(&kvm->irq_lock);
3333 kvm_ioapic_destroy(kvm);
3334 kvm_destroy_pic(kvm);
3335 mutex_unlock(&kvm->irq_lock);
3336 mutex_unlock(&kvm->slots_lock);
3337 }
3338 create_irqchip_unlock:
3339 mutex_unlock(&kvm->lock);
3340 break;
3341 }
3342 case KVM_CREATE_PIT:
3343 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3344 goto create_pit;
3345 case KVM_CREATE_PIT2:
3346 r = -EFAULT;
3347 if (copy_from_user(&u.pit_config, argp,
3348 sizeof(struct kvm_pit_config)))
3349 goto out;
3350 create_pit:
3351 mutex_lock(&kvm->slots_lock);
3352 r = -EEXIST;
3353 if (kvm->arch.vpit)
3354 goto create_pit_unlock;
3355 r = -ENOMEM;
3356 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3357 if (kvm->arch.vpit)
3358 r = 0;
3359 create_pit_unlock:
3360 mutex_unlock(&kvm->slots_lock);
3361 break;
3362 case KVM_IRQ_LINE_STATUS:
3363 case KVM_IRQ_LINE: {
3364 struct kvm_irq_level irq_event;
3365
3366 r = -EFAULT;
3367 if (copy_from_user(&irq_event, argp, sizeof irq_event))
3368 goto out;
3369 r = -ENXIO;
3370 if (irqchip_in_kernel(kvm)) {
3371 __s32 status;
3372 status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3373 irq_event.irq, irq_event.level);
3374 if (ioctl == KVM_IRQ_LINE_STATUS) {
3375 r = -EFAULT;
3376 irq_event.status = status;
3377 if (copy_to_user(argp, &irq_event,
3378 sizeof irq_event))
3379 goto out;
3380 }
3381 r = 0;
3382 }
3383 break;
3384 }
3385 case KVM_GET_IRQCHIP: {
3386
3387 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
3388
3389 r = -ENOMEM;
3390 if (!chip)
3391 goto out;
3392 r = -EFAULT;
3393 if (copy_from_user(chip, argp, sizeof *chip))
3394 goto get_irqchip_out;
3395 r = -ENXIO;
3396 if (!irqchip_in_kernel(kvm))
3397 goto get_irqchip_out;
3398 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3399 if (r)
3400 goto get_irqchip_out;
3401 r = -EFAULT;
3402 if (copy_to_user(argp, chip, sizeof *chip))
3403 goto get_irqchip_out;
3404 r = 0;
3405 get_irqchip_out:
3406 kfree(chip);
3407 if (r)
3408 goto out;
3409 break;
3410 }
3411 case KVM_SET_IRQCHIP: {
3412
3413 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
3414
3415 r = -ENOMEM;
3416 if (!chip)
3417 goto out;
3418 r = -EFAULT;
3419 if (copy_from_user(chip, argp, sizeof *chip))
3420 goto set_irqchip_out;
3421 r = -ENXIO;
3422 if (!irqchip_in_kernel(kvm))
3423 goto set_irqchip_out;
3424 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3425 if (r)
3426 goto set_irqchip_out;
3427 r = 0;
3428 set_irqchip_out:
3429 kfree(chip);
3430 if (r)
3431 goto out;
3432 break;
3433 }
3434 case KVM_GET_PIT: {
3435 r = -EFAULT;
3436 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3437 goto out;
3438 r = -ENXIO;
3439 if (!kvm->arch.vpit)
3440 goto out;
3441 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3442 if (r)
3443 goto out;
3444 r = -EFAULT;
3445 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3446 goto out;
3447 r = 0;
3448 break;
3449 }
3450 case KVM_SET_PIT: {
3451 r = -EFAULT;
3452 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3453 goto out;
3454 r = -ENXIO;
3455 if (!kvm->arch.vpit)
3456 goto out;
3457 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3458 if (r)
3459 goto out;
3460 r = 0;
3461 break;
3462 }
3463 case KVM_GET_PIT2: {
3464 r = -ENXIO;
3465 if (!kvm->arch.vpit)
3466 goto out;
3467 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3468 if (r)
3469 goto out;
3470 r = -EFAULT;
3471 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3472 goto out;
3473 r = 0;
3474 break;
3475 }
3476 case KVM_SET_PIT2: {
3477 r = -EFAULT;
3478 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3479 goto out;
3480 r = -ENXIO;
3481 if (!kvm->arch.vpit)
3482 goto out;
3483 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3484 if (r)
3485 goto out;
3486 r = 0;
3487 break;
3488 }
3489 case KVM_REINJECT_CONTROL: {
3490 struct kvm_reinject_control control;
3491 r = -EFAULT;
3492 if (copy_from_user(&control, argp, sizeof(control)))
3493 goto out;
3494 r = kvm_vm_ioctl_reinject(kvm, &control);
3495 if (r)
3496 goto out;
3497 r = 0;
3498 break;
3499 }
3500 case KVM_XEN_HVM_CONFIG: {
3501 r = -EFAULT;
3502 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3503 sizeof(struct kvm_xen_hvm_config)))
3504 goto out;
3505 r = -EINVAL;
3506 if (kvm->arch.xen_hvm_config.flags)
3507 goto out;
3508 r = 0;
3509 break;
3510 }
3511 case KVM_SET_CLOCK: {
3512 struct kvm_clock_data user_ns;
3513 u64 now_ns;
3514 s64 delta;
3515
3516 r = -EFAULT;
3517 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3518 goto out;
3519
3520 r = -EINVAL;
3521 if (user_ns.flags)
3522 goto out;
3523
3524 r = 0;
3525 local_irq_disable();
3526 now_ns = get_kernel_ns();
3527 delta = user_ns.clock - now_ns;
3528 local_irq_enable();
3529 kvm->arch.kvmclock_offset = delta;
3530 break;
3531 }
3532 case KVM_GET_CLOCK: {
3533 struct kvm_clock_data user_ns;
3534 u64 now_ns;
3535
3536 local_irq_disable();
3537 now_ns = get_kernel_ns();
3538 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3539 local_irq_enable();
3540 user_ns.flags = 0;
3541 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3542
3543 r = -EFAULT;
3544 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3545 goto out;
3546 r = 0;
3547 break;
3548 }
3549
3550 default:
3551 ;
3552 }
3553out:
3554 return r;
3555}
3556
3557static void kvm_init_msr_list(void)
3558{
3559 u32 dummy[2];
3560 unsigned i, j;
3561
3562
3563 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3564 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3565 continue;
3566 if (j < i)
3567 msrs_to_save[j] = msrs_to_save[i];
3568 j++;
3569 }
3570 num_msrs_to_save = j;
3571}
3572
3573static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3574 const void *v)
3575{
3576 if (vcpu->arch.apic &&
3577 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
3578 return 0;
3579
3580 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3581}
3582
3583static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3584{
3585 if (vcpu->arch.apic &&
3586 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
3587 return 0;
3588
3589 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
3590}
3591
3592static void kvm_set_segment(struct kvm_vcpu *vcpu,
3593 struct kvm_segment *var, int seg)
3594{
3595 kvm_x86_ops->set_segment(vcpu, var, seg);
3596}
3597
3598void kvm_get_segment(struct kvm_vcpu *vcpu,
3599 struct kvm_segment *var, int seg)
3600{
3601 kvm_x86_ops->get_segment(vcpu, var, seg);
3602}
3603
3604static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3605{
3606 return gpa;
3607}
3608
3609static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3610{
3611 gpa_t t_gpa;
3612 struct x86_exception exception;
3613
3614 BUG_ON(!mmu_is_nested(vcpu));
3615
3616
3617 access |= PFERR_USER_MASK;
3618 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
3619
3620 return t_gpa;
3621}
3622
3623gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
3624 struct x86_exception *exception)
3625{
3626 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3627 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3628}
3629
3630 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
3631 struct x86_exception *exception)
3632{
3633 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3634 access |= PFERR_FETCH_MASK;
3635 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3636}
3637
3638gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
3639 struct x86_exception *exception)
3640{
3641 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3642 access |= PFERR_WRITE_MASK;
3643 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3644}
3645
3646
3647gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
3648 struct x86_exception *exception)
3649{
3650 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
3651}
3652
3653static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3654 struct kvm_vcpu *vcpu, u32 access,
3655 struct x86_exception *exception)
3656{
3657 void *data = val;
3658 int r = X86EMUL_CONTINUE;
3659
3660 while (bytes) {
3661 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
3662 exception);
3663 unsigned offset = addr & (PAGE_SIZE-1);
3664 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3665 int ret;
3666
3667 if (gpa == UNMAPPED_GVA)
3668 return X86EMUL_PROPAGATE_FAULT;
3669 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3670 if (ret < 0) {
3671 r = X86EMUL_IO_NEEDED;
3672 goto out;
3673 }
3674
3675 bytes -= toread;
3676 data += toread;
3677 addr += toread;
3678 }
3679out:
3680 return r;
3681}
3682
3683
3684static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3685 struct kvm_vcpu *vcpu,
3686 struct x86_exception *exception)
3687{
3688 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3689 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3690 access | PFERR_FETCH_MASK,
3691 exception);
3692}
3693
3694static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3695 struct kvm_vcpu *vcpu,
3696 struct x86_exception *exception)
3697{
3698 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3699 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3700 exception);
3701}
3702
3703static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3704 struct kvm_vcpu *vcpu,
3705 struct x86_exception *exception)
3706{
3707 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
3708}
3709
3710static int kvm_write_guest_virt_system(gva_t addr, void *val,
3711 unsigned int bytes,
3712 struct kvm_vcpu *vcpu,
3713 struct x86_exception *exception)
3714{
3715 void *data = val;
3716 int r = X86EMUL_CONTINUE;
3717
3718 while (bytes) {
3719 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
3720 PFERR_WRITE_MASK,
3721 exception);
3722 unsigned offset = addr & (PAGE_SIZE-1);
3723 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3724 int ret;
3725
3726 if (gpa == UNMAPPED_GVA)
3727 return X86EMUL_PROPAGATE_FAULT;
3728 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
3729 if (ret < 0) {
3730 r = X86EMUL_IO_NEEDED;
3731 goto out;
3732 }
3733
3734 bytes -= towrite;
3735 data += towrite;
3736 addr += towrite;
3737 }
3738out:
3739 return r;
3740}
3741
3742static int emulator_read_emulated(unsigned long addr,
3743 void *val,
3744 unsigned int bytes,
3745 struct x86_exception *exception,
3746 struct kvm_vcpu *vcpu)
3747{
3748 gpa_t gpa;
3749
3750 if (vcpu->mmio_read_completed) {
3751 memcpy(val, vcpu->mmio_data, bytes);
3752 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
3753 vcpu->mmio_phys_addr, *(u64 *)val);
3754 vcpu->mmio_read_completed = 0;
3755 return X86EMUL_CONTINUE;
3756 }
3757
3758 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, exception);
3759
3760 if (gpa == UNMAPPED_GVA)
3761 return X86EMUL_PROPAGATE_FAULT;
3762
3763
3764 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3765 goto mmio;
3766
3767 if (kvm_read_guest_virt(addr, val, bytes, vcpu, exception)
3768 == X86EMUL_CONTINUE)
3769 return X86EMUL_CONTINUE;
3770
3771mmio:
3772
3773
3774
3775 if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
3776 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
3777 return X86EMUL_CONTINUE;
3778 }
3779
3780 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
3781
3782 vcpu->mmio_needed = 1;
3783 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3784 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3785 vcpu->run->mmio.len = vcpu->mmio_size = bytes;
3786 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
3787
3788 return X86EMUL_IO_NEEDED;
3789}
3790
3791int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
3792 const void *val, int bytes)
3793{
3794 int ret;
3795
3796 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
3797 if (ret < 0)
3798 return 0;
3799 kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
3800 return 1;
3801}
3802
3803static int emulator_write_emulated_onepage(unsigned long addr,
3804 const void *val,
3805 unsigned int bytes,
3806 struct x86_exception *exception,
3807 struct kvm_vcpu *vcpu)
3808{
3809 gpa_t gpa;
3810
3811 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
3812
3813 if (gpa == UNMAPPED_GVA)
3814 return X86EMUL_PROPAGATE_FAULT;
3815
3816
3817 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3818 goto mmio;
3819
3820 if (emulator_write_phys(vcpu, gpa, val, bytes))
3821 return X86EMUL_CONTINUE;
3822
3823mmio:
3824 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
3825
3826
3827
3828 if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
3829 return X86EMUL_CONTINUE;
3830
3831 vcpu->mmio_needed = 1;
3832 vcpu->run->exit_reason = KVM_EXIT_MMIO;
3833 vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
3834 vcpu->run->mmio.len = vcpu->mmio_size = bytes;
3835 vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
3836 memcpy(vcpu->run->mmio.data, val, bytes);
3837
3838 return X86EMUL_CONTINUE;
3839}
3840
3841int emulator_write_emulated(unsigned long addr,
3842 const void *val,
3843 unsigned int bytes,
3844 struct x86_exception *exception,
3845 struct kvm_vcpu *vcpu)
3846{
3847
3848 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
3849 int rc, now;
3850
3851 now = -addr & ~PAGE_MASK;
3852 rc = emulator_write_emulated_onepage(addr, val, now, exception,
3853 vcpu);
3854 if (rc != X86EMUL_CONTINUE)
3855 return rc;
3856 addr += now;
3857 val += now;
3858 bytes -= now;
3859 }
3860 return emulator_write_emulated_onepage(addr, val, bytes, exception,
3861 vcpu);
3862}
3863
3864#define CMPXCHG_TYPE(t, ptr, old, new) \
3865 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
3866
3867#ifdef CONFIG_X86_64
3868# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
3869#else
3870# define CMPXCHG64(ptr, old, new) \
3871 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
3872#endif
3873
3874static int emulator_cmpxchg_emulated(unsigned long addr,
3875 const void *old,
3876 const void *new,
3877 unsigned int bytes,
3878 struct x86_exception *exception,
3879 struct kvm_vcpu *vcpu)
3880{
3881 gpa_t gpa;
3882 struct page *page;
3883 char *kaddr;
3884 bool exchanged;
3885
3886
3887 if (bytes > 8 || (bytes & (bytes - 1)))
3888 goto emul_write;
3889
3890 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
3891
3892 if (gpa == UNMAPPED_GVA ||
3893 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3894 goto emul_write;
3895
3896 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
3897 goto emul_write;
3898
3899 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
3900 if (is_error_page(page)) {
3901 kvm_release_page_clean(page);
3902 goto emul_write;
3903 }
3904
3905 kaddr = kmap_atomic(page, KM_USER0);
3906 kaddr += offset_in_page(gpa);
3907 switch (bytes) {
3908 case 1:
3909 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
3910 break;
3911 case 2:
3912 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
3913 break;
3914 case 4:
3915 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
3916 break;
3917 case 8:
3918 exchanged = CMPXCHG64(kaddr, old, new);
3919 break;
3920 default:
3921 BUG();
3922 }
3923 kunmap_atomic(kaddr, KM_USER0);
3924 kvm_release_page_dirty(page);
3925
3926 if (!exchanged)
3927 return X86EMUL_CMPXCHG_FAILED;
3928
3929 kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1);
3930
3931 return X86EMUL_CONTINUE;
3932
3933emul_write:
3934 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
3935
3936 return emulator_write_emulated(addr, new, bytes, exception, vcpu);
3937}
3938
3939static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3940{
3941
3942 int r;
3943
3944 if (vcpu->arch.pio.in)
3945 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3946 vcpu->arch.pio.size, pd);
3947 else
3948 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3949 vcpu->arch.pio.port, vcpu->arch.pio.size,
3950 pd);
3951 return r;
3952}
3953
3954
3955static int emulator_pio_in_emulated(int size, unsigned short port, void *val,
3956 unsigned int count, struct kvm_vcpu *vcpu)
3957{
3958 if (vcpu->arch.pio.count)
3959 goto data_avail;
3960
3961 trace_kvm_pio(0, port, size, count);
3962
3963 vcpu->arch.pio.port = port;
3964 vcpu->arch.pio.in = 1;
3965 vcpu->arch.pio.count = count;
3966 vcpu->arch.pio.size = size;
3967
3968 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3969 data_avail:
3970 memcpy(val, vcpu->arch.pio_data, size * count);
3971 vcpu->arch.pio.count = 0;
3972 return 1;
3973 }
3974
3975 vcpu->run->exit_reason = KVM_EXIT_IO;
3976 vcpu->run->io.direction = KVM_EXIT_IO_IN;
3977 vcpu->run->io.size = size;
3978 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
3979 vcpu->run->io.count = count;
3980 vcpu->run->io.port = port;
3981
3982 return 0;
3983}
3984
3985static int emulator_pio_out_emulated(int size, unsigned short port,
3986 const void *val, unsigned int count,
3987 struct kvm_vcpu *vcpu)
3988{
3989 trace_kvm_pio(1, port, size, count);
3990
3991 vcpu->arch.pio.port = port;
3992 vcpu->arch.pio.in = 0;
3993 vcpu->arch.pio.count = count;
3994 vcpu->arch.pio.size = size;
3995
3996 memcpy(vcpu->arch.pio_data, val, size * count);
3997
3998 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3999 vcpu->arch.pio.count = 0;
4000 return 1;
4001 }
4002
4003 vcpu->run->exit_reason = KVM_EXIT_IO;
4004 vcpu->run->io.direction = KVM_EXIT_IO_OUT;
4005 vcpu->run->io.size = size;
4006 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4007 vcpu->run->io.count = count;
4008 vcpu->run->io.port = port;
4009
4010 return 0;
4011}
4012
4013static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4014{
4015 return kvm_x86_ops->get_segment_base(vcpu, seg);
4016}
4017
4018int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
4019{
4020 kvm_mmu_invlpg(vcpu, address);
4021 return X86EMUL_CONTINUE;
4022}
4023
4024int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4025{
4026 if (!need_emulate_wbinvd(vcpu))
4027 return X86EMUL_CONTINUE;
4028
4029 if (kvm_x86_ops->has_wbinvd_exit()) {
4030 int cpu = get_cpu();
4031
4032 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4033 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4034 wbinvd_ipi, NULL, 1);
4035 put_cpu();
4036 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4037 } else
4038 wbinvd();
4039 return X86EMUL_CONTINUE;
4040}
4041EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4042
4043int emulate_clts(struct kvm_vcpu *vcpu)
4044{
4045 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
4046 kvm_x86_ops->fpu_activate(vcpu);
4047 return X86EMUL_CONTINUE;
4048}
4049
4050int emulator_get_dr(int dr, unsigned long *dest, struct kvm_vcpu *vcpu)
4051{
4052 return _kvm_get_dr(vcpu, dr, dest);
4053}
4054
4055int emulator_set_dr(int dr, unsigned long value, struct kvm_vcpu *vcpu)
4056{
4057
4058 return __kvm_set_dr(vcpu, dr, value);
4059}
4060
4061static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4062{
4063 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4064}
4065
4066static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu)
4067{
4068 unsigned long value;
4069
4070 switch (cr) {
4071 case 0:
4072 value = kvm_read_cr0(vcpu);
4073 break;
4074 case 2:
4075 value = vcpu->arch.cr2;
4076 break;
4077 case 3:
4078 value = kvm_read_cr3(vcpu);
4079 break;
4080 case 4:
4081 value = kvm_read_cr4(vcpu);
4082 break;
4083 case 8:
4084 value = kvm_get_cr8(vcpu);
4085 break;
4086 default:
4087 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4088 return 0;
4089 }
4090
4091 return value;
4092}
4093
4094static int emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu)
4095{
4096 int res = 0;
4097
4098 switch (cr) {
4099 case 0:
4100 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4101 break;
4102 case 2:
4103 vcpu->arch.cr2 = val;
4104 break;
4105 case 3:
4106 res = kvm_set_cr3(vcpu, val);
4107 break;
4108 case 4:
4109 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4110 break;
4111 case 8:
4112 res = kvm_set_cr8(vcpu, val);
4113 break;
4114 default:
4115 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
4116 res = -1;
4117 }
4118
4119 return res;
4120}
4121
4122static int emulator_get_cpl(struct kvm_vcpu *vcpu)
4123{
4124 return kvm_x86_ops->get_cpl(vcpu);
4125}
4126
4127static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
4128{
4129 kvm_x86_ops->get_gdt(vcpu, dt);
4130}
4131
4132static void emulator_get_idt(struct desc_ptr *dt, struct kvm_vcpu *vcpu)
4133{
4134 kvm_x86_ops->get_idt(vcpu, dt);
4135}
4136
4137static unsigned long emulator_get_cached_segment_base(int seg,
4138 struct kvm_vcpu *vcpu)
4139{
4140 return get_segment_base(vcpu, seg);
4141}
4142
4143static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg,
4144 struct kvm_vcpu *vcpu)
4145{
4146 struct kvm_segment var;
4147
4148 kvm_get_segment(vcpu, &var, seg);
4149
4150 if (var.unusable)
4151 return false;
4152
4153 if (var.g)
4154 var.limit >>= 12;
4155 set_desc_limit(desc, var.limit);
4156 set_desc_base(desc, (unsigned long)var.base);
4157 desc->type = var.type;
4158 desc->s = var.s;
4159 desc->dpl = var.dpl;
4160 desc->p = var.present;
4161 desc->avl = var.avl;
4162 desc->l = var.l;
4163 desc->d = var.db;
4164 desc->g = var.g;
4165
4166 return true;
4167}
4168
4169static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg,
4170 struct kvm_vcpu *vcpu)
4171{
4172 struct kvm_segment var;
4173
4174
4175 kvm_get_segment(vcpu, &var, seg);
4176
4177 var.base = get_desc_base(desc);
4178 var.limit = get_desc_limit(desc);
4179 if (desc->g)
4180 var.limit = (var.limit << 12) | 0xfff;
4181 var.type = desc->type;
4182 var.present = desc->p;
4183 var.dpl = desc->dpl;
4184 var.db = desc->d;
4185 var.s = desc->s;
4186 var.l = desc->l;
4187 var.g = desc->g;
4188 var.avl = desc->avl;
4189 var.present = desc->p;
4190 var.unusable = !var.present;
4191 var.padding = 0;
4192
4193 kvm_set_segment(vcpu, &var, seg);
4194 return;
4195}
4196
4197static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu)
4198{
4199 struct kvm_segment kvm_seg;
4200
4201 kvm_get_segment(vcpu, &kvm_seg, seg);
4202 return kvm_seg.selector;
4203}
4204
4205static void emulator_set_segment_selector(u16 sel, int seg,
4206 struct kvm_vcpu *vcpu)
4207{
4208 struct kvm_segment kvm_seg;
4209
4210 kvm_get_segment(vcpu, &kvm_seg, seg);
4211 kvm_seg.selector = sel;
4212 kvm_set_segment(vcpu, &kvm_seg, seg);
4213}
4214
4215static struct x86_emulate_ops emulate_ops = {
4216 .read_std = kvm_read_guest_virt_system,
4217 .write_std = kvm_write_guest_virt_system,
4218 .fetch = kvm_fetch_guest_virt,
4219 .read_emulated = emulator_read_emulated,
4220 .write_emulated = emulator_write_emulated,
4221 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4222 .pio_in_emulated = emulator_pio_in_emulated,
4223 .pio_out_emulated = emulator_pio_out_emulated,
4224 .get_cached_descriptor = emulator_get_cached_descriptor,
4225 .set_cached_descriptor = emulator_set_cached_descriptor,
4226 .get_segment_selector = emulator_get_segment_selector,
4227 .set_segment_selector = emulator_set_segment_selector,
4228 .get_cached_segment_base = emulator_get_cached_segment_base,
4229 .get_gdt = emulator_get_gdt,
4230 .get_idt = emulator_get_idt,
4231 .get_cr = emulator_get_cr,
4232 .set_cr = emulator_set_cr,
4233 .cpl = emulator_get_cpl,
4234 .get_dr = emulator_get_dr,
4235 .set_dr = emulator_set_dr,
4236 .set_msr = kvm_set_msr,
4237 .get_msr = kvm_get_msr,
4238};
4239
4240static void cache_all_regs(struct kvm_vcpu *vcpu)
4241{
4242 kvm_register_read(vcpu, VCPU_REGS_RAX);
4243 kvm_register_read(vcpu, VCPU_REGS_RSP);
4244 kvm_register_read(vcpu, VCPU_REGS_RIP);
4245 vcpu->arch.regs_dirty = ~0;
4246}
4247
4248static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4249{
4250 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4251
4252
4253
4254
4255
4256
4257
4258 if (!(int_shadow & mask))
4259 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4260}
4261
4262static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4263{
4264 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4265 if (ctxt->exception.vector == PF_VECTOR)
4266 kvm_propagate_fault(vcpu, &ctxt->exception);
4267 else if (ctxt->exception.error_code_valid)
4268 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4269 ctxt->exception.error_code);
4270 else
4271 kvm_queue_exception(vcpu, ctxt->exception.vector);
4272}
4273
4274static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4275{
4276 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4277 int cs_db, cs_l;
4278
4279 cache_all_regs(vcpu);
4280
4281 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4282
4283 vcpu->arch.emulate_ctxt.vcpu = vcpu;
4284 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
4285 vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
4286 vcpu->arch.emulate_ctxt.mode =
4287 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4288 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
4289 ? X86EMUL_MODE_VM86 : cs_l
4290 ? X86EMUL_MODE_PROT64 : cs_db
4291 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
4292 memset(c, 0, sizeof(struct decode_cache));
4293 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4294}
4295
4296int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq)
4297{
4298 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4299 int ret;
4300
4301 init_emulate_ctxt(vcpu);
4302
4303 vcpu->arch.emulate_ctxt.decode.op_bytes = 2;
4304 vcpu->arch.emulate_ctxt.decode.ad_bytes = 2;
4305 vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip;
4306 ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
4307
4308 if (ret != X86EMUL_CONTINUE)
4309 return EMULATE_FAIL;
4310
4311 vcpu->arch.emulate_ctxt.eip = c->eip;
4312 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4313 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4314 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4315
4316 if (irq == NMI_VECTOR)
4317 vcpu->arch.nmi_pending = false;
4318 else
4319 vcpu->arch.interrupt.pending = false;
4320
4321 return EMULATE_DONE;
4322}
4323EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4324
4325static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4326{
4327 int r = EMULATE_DONE;
4328
4329 ++vcpu->stat.insn_emulation_fail;
4330 trace_kvm_emulate_insn_failed(vcpu);
4331 if (!is_guest_mode(vcpu)) {
4332 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4333 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4334 vcpu->run->internal.ndata = 0;
4335 r = EMULATE_FAIL;
4336 }
4337 kvm_queue_exception(vcpu, UD_VECTOR);
4338
4339 return r;
4340}
4341
4342static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4343{
4344 gpa_t gpa;
4345
4346 if (tdp_enabled)
4347 return false;
4348
4349
4350
4351
4352
4353
4354 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4355 return true;
4356
4357 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4358
4359 if (gpa == UNMAPPED_GVA)
4360 return true;
4361
4362 if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
4363 return true;
4364
4365 return false;
4366}
4367
4368int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4369 unsigned long cr2,
4370 int emulation_type,
4371 void *insn,
4372 int insn_len)
4373{
4374 int r;
4375 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
4376
4377 kvm_clear_exception_queue(vcpu);
4378 vcpu->arch.mmio_fault_cr2 = cr2;
4379
4380
4381
4382
4383
4384
4385 cache_all_regs(vcpu);
4386
4387 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4388 init_emulate_ctxt(vcpu);
4389 vcpu->arch.emulate_ctxt.interruptibility = 0;
4390 vcpu->arch.emulate_ctxt.have_exception = false;
4391 vcpu->arch.emulate_ctxt.perm_ok = false;
4392
4393 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len);
4394 if (r == X86EMUL_PROPAGATE_FAULT)
4395 goto done;
4396
4397 trace_kvm_emulate_insn_start(vcpu);
4398
4399
4400
4401 if (emulation_type & EMULTYPE_TRAP_UD) {
4402 if (!c->twobyte)
4403 return EMULATE_FAIL;
4404 switch (c->b) {
4405 case 0x01:
4406 if (c->modrm_mod != 3 || c->modrm_rm != 1)
4407 return EMULATE_FAIL;
4408 break;
4409 case 0x34:
4410 case 0x35:
4411 if (c->modrm_mod != 0 || c->modrm_rm != 0)
4412 return EMULATE_FAIL;
4413 break;
4414 case 0x05:
4415 if (c->modrm_mod != 0 || c->modrm_rm != 0)
4416 return EMULATE_FAIL;
4417 break;
4418 default:
4419 return EMULATE_FAIL;
4420 }
4421
4422 if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
4423 return EMULATE_FAIL;
4424 }
4425
4426 ++vcpu->stat.insn_emulation;
4427 if (r) {
4428 if (reexecute_instruction(vcpu, cr2))
4429 return EMULATE_DONE;
4430 if (emulation_type & EMULTYPE_SKIP)
4431 return EMULATE_FAIL;
4432 return handle_emulation_failure(vcpu);
4433 }
4434 }
4435
4436 if (emulation_type & EMULTYPE_SKIP) {
4437 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
4438 return EMULATE_DONE;
4439 }
4440
4441
4442
4443 memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
4444
4445restart:
4446 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
4447
4448 if (r == EMULATION_FAILED) {
4449 if (reexecute_instruction(vcpu, cr2))
4450 return EMULATE_DONE;
4451
4452 return handle_emulation_failure(vcpu);
4453 }
4454
4455done:
4456 if (vcpu->arch.emulate_ctxt.have_exception) {
4457 inject_emulated_exception(vcpu);
4458 r = EMULATE_DONE;
4459 } else if (vcpu->arch.pio.count) {
4460 if (!vcpu->arch.pio.in)
4461 vcpu->arch.pio.count = 0;
4462 r = EMULATE_DO_MMIO;
4463 } else if (vcpu->mmio_needed) {
4464 if (vcpu->mmio_is_write)
4465 vcpu->mmio_needed = 0;
4466 r = EMULATE_DO_MMIO;
4467 } else if (r == EMULATION_RESTART)
4468 goto restart;
4469 else
4470 r = EMULATE_DONE;
4471
4472 toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility);
4473 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
4474 kvm_make_request(KVM_REQ_EVENT, vcpu);
4475 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
4476 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
4477
4478 return r;
4479}
4480EXPORT_SYMBOL_GPL(x86_emulate_instruction);
4481
4482int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4483{
4484 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
4485 int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu);
4486
4487 vcpu->arch.pio.count = 0;
4488 return ret;
4489}
4490EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4491
4492static void tsc_bad(void *info)
4493{
4494 __this_cpu_write(cpu_tsc_khz, 0);
4495}
4496
4497static void tsc_khz_changed(void *data)
4498{
4499 struct cpufreq_freqs *freq = data;
4500 unsigned long khz = 0;
4501
4502 if (data)
4503 khz = freq->new;
4504 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4505 khz = cpufreq_quick_get(raw_smp_processor_id());
4506 if (!khz)
4507 khz = tsc_khz;
4508 __this_cpu_write(cpu_tsc_khz, khz);
4509}
4510
4511static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
4512 void *data)
4513{
4514 struct cpufreq_freqs *freq = data;
4515 struct kvm *kvm;
4516 struct kvm_vcpu *vcpu;
4517 int i, send_ipi = 0;
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
4559 return 0;
4560 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
4561 return 0;
4562
4563 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4564
4565 spin_lock(&kvm_lock);
4566 list_for_each_entry(kvm, &vm_list, vm_list) {
4567 kvm_for_each_vcpu(i, vcpu, kvm) {
4568 if (vcpu->cpu != freq->cpu)
4569 continue;
4570 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4571 if (vcpu->cpu != smp_processor_id())
4572 send_ipi = 1;
4573 }
4574 }
4575 spin_unlock(&kvm_lock);
4576
4577 if (freq->old < freq->new && send_ipi) {
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
4591 }
4592 return 0;
4593}
4594
4595static struct notifier_block kvmclock_cpufreq_notifier_block = {
4596 .notifier_call = kvmclock_cpufreq_notifier
4597};
4598
4599static int kvmclock_cpu_notifier(struct notifier_block *nfb,
4600 unsigned long action, void *hcpu)
4601{
4602 unsigned int cpu = (unsigned long)hcpu;
4603
4604 switch (action) {
4605 case CPU_ONLINE:
4606 case CPU_DOWN_FAILED:
4607 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4608 break;
4609 case CPU_DOWN_PREPARE:
4610 smp_call_function_single(cpu, tsc_bad, NULL, 1);
4611 break;
4612 }
4613 return NOTIFY_OK;
4614}
4615
4616static struct notifier_block kvmclock_cpu_notifier_block = {
4617 .notifier_call = kvmclock_cpu_notifier,
4618 .priority = -INT_MAX
4619};
4620
4621static void kvm_timer_init(void)
4622{
4623 int cpu;
4624
4625 max_tsc_khz = tsc_khz;
4626 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4627 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
4628#ifdef CONFIG_CPU_FREQ
4629 struct cpufreq_policy policy;
4630 memset(&policy, 0, sizeof(policy));
4631 cpu = get_cpu();
4632 cpufreq_get_policy(&policy, cpu);
4633 if (policy.cpuinfo.max_freq)
4634 max_tsc_khz = policy.cpuinfo.max_freq;
4635 put_cpu();
4636#endif
4637 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4638 CPUFREQ_TRANSITION_NOTIFIER);
4639 }
4640 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
4641 for_each_online_cpu(cpu)
4642 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
4643}
4644
4645static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
4646
4647static int kvm_is_in_guest(void)
4648{
4649 return percpu_read(current_vcpu) != NULL;
4650}
4651
4652static int kvm_is_user_mode(void)
4653{
4654 int user_mode = 3;
4655
4656 if (percpu_read(current_vcpu))
4657 user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
4658
4659 return user_mode != 0;
4660}
4661
4662static unsigned long kvm_get_guest_ip(void)
4663{
4664 unsigned long ip = 0;
4665
4666 if (percpu_read(current_vcpu))
4667 ip = kvm_rip_read(percpu_read(current_vcpu));
4668
4669 return ip;
4670}
4671
4672static struct perf_guest_info_callbacks kvm_guest_cbs = {
4673 .is_in_guest = kvm_is_in_guest,
4674 .is_user_mode = kvm_is_user_mode,
4675 .get_guest_ip = kvm_get_guest_ip,
4676};
4677
4678void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
4679{
4680 percpu_write(current_vcpu, vcpu);
4681}
4682EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
4683
4684void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
4685{
4686 percpu_write(current_vcpu, NULL);
4687}
4688EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
4689
4690int kvm_arch_init(void *opaque)
4691{
4692 int r;
4693 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
4694
4695 if (kvm_x86_ops) {
4696 printk(KERN_ERR "kvm: already loaded the other module\n");
4697 r = -EEXIST;
4698 goto out;
4699 }
4700
4701 if (!ops->cpu_has_kvm_support()) {
4702 printk(KERN_ERR "kvm: no hardware support\n");
4703 r = -EOPNOTSUPP;
4704 goto out;
4705 }
4706 if (ops->disabled_by_bios()) {
4707 printk(KERN_ERR "kvm: disabled by bios\n");
4708 r = -EOPNOTSUPP;
4709 goto out;
4710 }
4711
4712 r = kvm_mmu_module_init();
4713 if (r)
4714 goto out;
4715
4716 kvm_init_msr_list();
4717
4718 kvm_x86_ops = ops;
4719 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
4720 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
4721 PT_DIRTY_MASK, PT64_NX_MASK, 0);
4722
4723 kvm_timer_init();
4724
4725 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4726
4727 if (cpu_has_xsave)
4728 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
4729
4730 return 0;
4731
4732out:
4733 return r;
4734}
4735
4736void kvm_arch_exit(void)
4737{
4738 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
4739
4740 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4741 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4742 CPUFREQ_TRANSITION_NOTIFIER);
4743 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
4744 kvm_x86_ops = NULL;
4745 kvm_mmu_module_exit();
4746}
4747
4748int kvm_emulate_halt(struct kvm_vcpu *vcpu)
4749{
4750 ++vcpu->stat.halt_exits;
4751 if (irqchip_in_kernel(vcpu->kvm)) {
4752 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
4753 return 1;
4754 } else {
4755 vcpu->run->exit_reason = KVM_EXIT_HLT;
4756 return 0;
4757 }
4758}
4759EXPORT_SYMBOL_GPL(kvm_emulate_halt);
4760
4761static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
4762 unsigned long a1)
4763{
4764 if (is_long_mode(vcpu))
4765 return a0;
4766 else
4767 return a0 | ((gpa_t)a1 << 32);
4768}
4769
4770int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
4771{
4772 u64 param, ingpa, outgpa, ret;
4773 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
4774 bool fast, longmode;
4775 int cs_db, cs_l;
4776
4777
4778
4779
4780
4781 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
4782 kvm_queue_exception(vcpu, UD_VECTOR);
4783 return 0;
4784 }
4785
4786 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4787 longmode = is_long_mode(vcpu) && cs_l == 1;
4788
4789 if (!longmode) {
4790 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
4791 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
4792 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
4793 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
4794 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
4795 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
4796 }
4797#ifdef CONFIG_X86_64
4798 else {
4799 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
4800 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
4801 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
4802 }
4803#endif
4804
4805 code = param & 0xffff;
4806 fast = (param >> 16) & 0x1;
4807 rep_cnt = (param >> 32) & 0xfff;
4808 rep_idx = (param >> 48) & 0xfff;
4809
4810 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
4811
4812 switch (code) {
4813 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
4814 kvm_vcpu_on_spin(vcpu);
4815 break;
4816 default:
4817 res = HV_STATUS_INVALID_HYPERCALL_CODE;
4818 break;
4819 }
4820
4821 ret = res | (((u64)rep_done & 0xfff) << 32);
4822 if (longmode) {
4823 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
4824 } else {
4825 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
4826 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
4827 }
4828
4829 return 1;
4830}
4831
4832int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
4833{
4834 unsigned long nr, a0, a1, a2, a3, ret;
4835 int r = 1;
4836
4837 if (kvm_hv_hypercall_enabled(vcpu->kvm))
4838 return kvm_hv_hypercall(vcpu);
4839
4840 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
4841 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
4842 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
4843 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
4844 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
4845
4846 trace_kvm_hypercall(nr, a0, a1, a2, a3);
4847
4848 if (!is_long_mode(vcpu)) {
4849 nr &= 0xFFFFFFFF;
4850 a0 &= 0xFFFFFFFF;
4851 a1 &= 0xFFFFFFFF;
4852 a2 &= 0xFFFFFFFF;
4853 a3 &= 0xFFFFFFFF;
4854 }
4855
4856 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
4857 ret = -KVM_EPERM;
4858 goto out;
4859 }
4860
4861 switch (nr) {
4862 case KVM_HC_VAPIC_POLL_IRQ:
4863 ret = 0;
4864 break;
4865 case KVM_HC_MMU_OP:
4866 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
4867 break;
4868 default:
4869 ret = -KVM_ENOSYS;
4870 break;
4871 }
4872out:
4873 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
4874 ++vcpu->stat.hypercalls;
4875 return r;
4876}
4877EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
4878
4879int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
4880{
4881 char instruction[3];
4882 unsigned long rip = kvm_rip_read(vcpu);
4883
4884
4885
4886
4887
4888
4889 kvm_mmu_zap_all(vcpu->kvm);
4890
4891 kvm_x86_ops->patch_hypercall(vcpu, instruction);
4892
4893 return emulator_write_emulated(rip, instruction, 3, NULL, vcpu);
4894}
4895
4896void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4897{
4898 struct desc_ptr dt = { limit, base };
4899
4900 kvm_x86_ops->set_gdt(vcpu, &dt);
4901}
4902
4903void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
4904{
4905 struct desc_ptr dt = { limit, base };
4906
4907 kvm_x86_ops->set_idt(vcpu, &dt);
4908}
4909
4910static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
4911{
4912 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
4913 int j, nent = vcpu->arch.cpuid_nent;
4914
4915 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
4916
4917 for (j = i + 1; ; j = (j + 1) % nent) {
4918 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
4919 if (ej->function == e->function) {
4920 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
4921 return j;
4922 }
4923 }
4924 return 0;
4925}
4926
4927
4928
4929static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
4930 u32 function, u32 index)
4931{
4932 if (e->function != function)
4933 return 0;
4934 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
4935 return 0;
4936 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
4937 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
4938 return 0;
4939 return 1;
4940}
4941
4942struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
4943 u32 function, u32 index)
4944{
4945 int i;
4946 struct kvm_cpuid_entry2 *best = NULL;
4947
4948 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
4949 struct kvm_cpuid_entry2 *e;
4950
4951 e = &vcpu->arch.cpuid_entries[i];
4952 if (is_matching_cpuid_entry(e, function, index)) {
4953 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
4954 move_to_next_stateful_cpuid_entry(vcpu, i);
4955 best = e;
4956 break;
4957 }
4958
4959
4960
4961 if (((e->function ^ function) & 0x80000000) == 0)
4962 if (!best || e->function > best->function)
4963 best = e;
4964 }
4965 return best;
4966}
4967EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
4968
4969int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
4970{
4971 struct kvm_cpuid_entry2 *best;
4972
4973 best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
4974 if (!best || best->eax < 0x80000008)
4975 goto not_found;
4976 best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
4977 if (best)
4978 return best->eax & 0xff;
4979not_found:
4980 return 36;
4981}
4982
4983void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
4984{
4985 u32 function, index;
4986 struct kvm_cpuid_entry2 *best;
4987
4988 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
4989 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
4990 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
4991 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
4992 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
4993 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
4994 best = kvm_find_cpuid_entry(vcpu, function, index);
4995 if (best) {
4996 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
4997 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
4998 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
4999 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
5000 }
5001 kvm_x86_ops->skip_emulated_instruction(vcpu);
5002 trace_kvm_cpuid(function,
5003 kvm_register_read(vcpu, VCPU_REGS_RAX),
5004 kvm_register_read(vcpu, VCPU_REGS_RBX),
5005 kvm_register_read(vcpu, VCPU_REGS_RCX),
5006 kvm_register_read(vcpu, VCPU_REGS_RDX));
5007}
5008EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
5009
5010
5011
5012
5013
5014
5015
5016static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5017{
5018 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5019 vcpu->run->request_interrupt_window &&
5020 kvm_arch_interrupt_allowed(vcpu));
5021}
5022
5023static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5024{
5025 struct kvm_run *kvm_run = vcpu->run;
5026
5027 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5028 kvm_run->cr8 = kvm_get_cr8(vcpu);
5029 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5030 if (irqchip_in_kernel(vcpu->kvm))
5031 kvm_run->ready_for_interrupt_injection = 1;
5032 else
5033 kvm_run->ready_for_interrupt_injection =
5034 kvm_arch_interrupt_allowed(vcpu) &&
5035 !kvm_cpu_has_interrupt(vcpu) &&
5036 !kvm_event_needs_reinjection(vcpu);
5037}
5038
5039static void vapic_enter(struct kvm_vcpu *vcpu)
5040{
5041 struct kvm_lapic *apic = vcpu->arch.apic;
5042 struct page *page;
5043
5044 if (!apic || !apic->vapic_addr)
5045 return;
5046
5047 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5048
5049 vcpu->arch.apic->vapic_page = page;
5050}
5051
5052static void vapic_exit(struct kvm_vcpu *vcpu)
5053{
5054 struct kvm_lapic *apic = vcpu->arch.apic;
5055 int idx;
5056
5057 if (!apic || !apic->vapic_addr)
5058 return;
5059
5060 idx = srcu_read_lock(&vcpu->kvm->srcu);
5061 kvm_release_page_dirty(apic->vapic_page);
5062 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5063 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5064}
5065
5066static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5067{
5068 int max_irr, tpr;
5069
5070 if (!kvm_x86_ops->update_cr8_intercept)
5071 return;
5072
5073 if (!vcpu->arch.apic)
5074 return;
5075
5076 if (!vcpu->arch.apic->vapic_addr)
5077 max_irr = kvm_lapic_find_highest_irr(vcpu);
5078 else
5079 max_irr = -1;
5080
5081 if (max_irr != -1)
5082 max_irr >>= 4;
5083
5084 tpr = kvm_lapic_get_cr8(vcpu);
5085
5086 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5087}
5088
5089static void inject_pending_event(struct kvm_vcpu *vcpu)
5090{
5091
5092 if (vcpu->arch.exception.pending) {
5093 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5094 vcpu->arch.exception.has_error_code,
5095 vcpu->arch.exception.error_code);
5096 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5097 vcpu->arch.exception.has_error_code,
5098 vcpu->arch.exception.error_code,
5099 vcpu->arch.exception.reinject);
5100 return;
5101 }
5102
5103 if (vcpu->arch.nmi_injected) {
5104 kvm_x86_ops->set_nmi(vcpu);
5105 return;
5106 }
5107
5108 if (vcpu->arch.interrupt.pending) {
5109 kvm_x86_ops->set_irq(vcpu);
5110 return;
5111 }
5112
5113
5114 if (vcpu->arch.nmi_pending) {
5115 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5116 vcpu->arch.nmi_pending = false;
5117 vcpu->arch.nmi_injected = true;
5118 kvm_x86_ops->set_nmi(vcpu);
5119 }
5120 } else if (kvm_cpu_has_interrupt(vcpu)) {
5121 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5122 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5123 false);
5124 kvm_x86_ops->set_irq(vcpu);
5125 }
5126 }
5127}
5128
5129static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
5130{
5131 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
5132 !vcpu->guest_xcr0_loaded) {
5133
5134 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
5135 vcpu->guest_xcr0_loaded = 1;
5136 }
5137}
5138
5139static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
5140{
5141 if (vcpu->guest_xcr0_loaded) {
5142 if (vcpu->arch.xcr0 != host_xcr0)
5143 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
5144 vcpu->guest_xcr0_loaded = 0;
5145 }
5146}
5147
5148static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5149{
5150 int r;
5151 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5152 vcpu->run->request_interrupt_window;
5153
5154 if (vcpu->requests) {
5155 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
5156 kvm_mmu_unload(vcpu);
5157 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5158 __kvm_migrate_timers(vcpu);
5159 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5160 r = kvm_guest_time_update(vcpu);
5161 if (unlikely(r))
5162 goto out;
5163 }
5164 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
5165 kvm_mmu_sync_roots(vcpu);
5166 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
5167 kvm_x86_ops->tlb_flush(vcpu);
5168 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
5169 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
5170 r = 0;
5171 goto out;
5172 }
5173 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
5174 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5175 r = 0;
5176 goto out;
5177 }
5178 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
5179 vcpu->fpu_active = 0;
5180 kvm_x86_ops->fpu_deactivate(vcpu);
5181 }
5182 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5183
5184 vcpu->arch.apf.halted = true;
5185 r = 1;
5186 goto out;
5187 }
5188 }
5189
5190 r = kvm_mmu_reload(vcpu);
5191 if (unlikely(r))
5192 goto out;
5193
5194 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5195 inject_pending_event(vcpu);
5196
5197
5198 if (vcpu->arch.nmi_pending)
5199 kvm_x86_ops->enable_nmi_window(vcpu);
5200 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
5201 kvm_x86_ops->enable_irq_window(vcpu);
5202
5203 if (kvm_lapic_enabled(vcpu)) {
5204 update_cr8_intercept(vcpu);
5205 kvm_lapic_sync_to_vapic(vcpu);
5206 }
5207 }
5208
5209 preempt_disable();
5210
5211 kvm_x86_ops->prepare_guest_switch(vcpu);
5212 if (vcpu->fpu_active)
5213 kvm_load_guest_fpu(vcpu);
5214 kvm_load_guest_xcr0(vcpu);
5215
5216 atomic_set(&vcpu->guest_mode, 1);
5217 smp_wmb();
5218
5219 local_irq_disable();
5220
5221 if (!atomic_read(&vcpu->guest_mode) || vcpu->requests
5222 || need_resched() || signal_pending(current)) {
5223 atomic_set(&vcpu->guest_mode, 0);
5224 smp_wmb();
5225 local_irq_enable();
5226 preempt_enable();
5227 kvm_x86_ops->cancel_injection(vcpu);
5228 r = 1;
5229 goto out;
5230 }
5231
5232 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5233
5234 kvm_guest_enter();
5235
5236 if (unlikely(vcpu->arch.switch_db_regs)) {
5237 set_debugreg(0, 7);
5238 set_debugreg(vcpu->arch.eff_db[0], 0);
5239 set_debugreg(vcpu->arch.eff_db[1], 1);
5240 set_debugreg(vcpu->arch.eff_db[2], 2);
5241 set_debugreg(vcpu->arch.eff_db[3], 3);
5242 }
5243
5244 trace_kvm_entry(vcpu->vcpu_id);
5245 kvm_x86_ops->run(vcpu);
5246
5247
5248
5249
5250
5251
5252
5253
5254 if (hw_breakpoint_active())
5255 hw_breakpoint_restore();
5256
5257 kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
5258
5259 atomic_set(&vcpu->guest_mode, 0);
5260 smp_wmb();
5261 local_irq_enable();
5262
5263 ++vcpu->stat.exits;
5264
5265
5266
5267
5268
5269
5270
5271 barrier();
5272
5273 kvm_guest_exit();
5274
5275 preempt_enable();
5276
5277 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5278
5279
5280
5281
5282 if (unlikely(prof_on == KVM_PROFILING)) {
5283 unsigned long rip = kvm_rip_read(vcpu);
5284 profile_hit(KVM_PROFILING, (void *)rip);
5285 }
5286
5287
5288 kvm_lapic_sync_from_vapic(vcpu);
5289
5290 r = kvm_x86_ops->handle_exit(vcpu);
5291out:
5292 return r;
5293}
5294
5295
5296static int __vcpu_run(struct kvm_vcpu *vcpu)
5297{
5298 int r;
5299 struct kvm *kvm = vcpu->kvm;
5300
5301 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
5302 pr_debug("vcpu %d received sipi with vector # %x\n",
5303 vcpu->vcpu_id, vcpu->arch.sipi_vector);
5304 kvm_lapic_reset(vcpu);
5305 r = kvm_arch_vcpu_reset(vcpu);
5306 if (r)
5307 return r;
5308 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5309 }
5310
5311 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5312 vapic_enter(vcpu);
5313
5314 r = 1;
5315 while (r > 0) {
5316 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5317 !vcpu->arch.apf.halted)
5318 r = vcpu_enter_guest(vcpu);
5319 else {
5320 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5321 kvm_vcpu_block(vcpu);
5322 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5323 if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
5324 {
5325 switch(vcpu->arch.mp_state) {
5326 case KVM_MP_STATE_HALTED:
5327 vcpu->arch.mp_state =
5328 KVM_MP_STATE_RUNNABLE;
5329 case KVM_MP_STATE_RUNNABLE:
5330 vcpu->arch.apf.halted = false;
5331 break;
5332 case KVM_MP_STATE_SIPI_RECEIVED:
5333 default:
5334 r = -EINTR;
5335 break;
5336 }
5337 }
5338 }
5339
5340 if (r <= 0)
5341 break;
5342
5343 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
5344 if (kvm_cpu_has_pending_timer(vcpu))
5345 kvm_inject_pending_timer_irqs(vcpu);
5346
5347 if (dm_request_for_irq_injection(vcpu)) {
5348 r = -EINTR;
5349 vcpu->run->exit_reason = KVM_EXIT_INTR;
5350 ++vcpu->stat.request_irq_exits;
5351 }
5352
5353 kvm_check_async_pf_completion(vcpu);
5354
5355 if (signal_pending(current)) {
5356 r = -EINTR;
5357 vcpu->run->exit_reason = KVM_EXIT_INTR;
5358 ++vcpu->stat.signal_exits;
5359 }
5360 if (need_resched()) {
5361 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5362 kvm_resched(vcpu);
5363 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5364 }
5365 }
5366
5367 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5368
5369 vapic_exit(vcpu);
5370
5371 return r;
5372}
5373
5374int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5375{
5376 int r;
5377 sigset_t sigsaved;
5378
5379 if (!tsk_used_math(current) && init_fpu(current))
5380 return -ENOMEM;
5381
5382 if (vcpu->sigset_active)
5383 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
5384
5385 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
5386 kvm_vcpu_block(vcpu);
5387 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
5388 r = -EAGAIN;
5389 goto out;
5390 }
5391
5392
5393 if (!irqchip_in_kernel(vcpu->kvm)) {
5394 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
5395 r = -EINVAL;
5396 goto out;
5397 }
5398 }
5399
5400 if (vcpu->arch.pio.count || vcpu->mmio_needed) {
5401 if (vcpu->mmio_needed) {
5402 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
5403 vcpu->mmio_read_completed = 1;
5404 vcpu->mmio_needed = 0;
5405 }
5406 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5407 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5408 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5409 if (r != EMULATE_DONE) {
5410 r = 0;
5411 goto out;
5412 }
5413 }
5414 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
5415 kvm_register_write(vcpu, VCPU_REGS_RAX,
5416 kvm_run->hypercall.ret);
5417
5418 r = __vcpu_run(vcpu);
5419
5420out:
5421 post_kvm_run_save(vcpu);
5422 if (vcpu->sigset_active)
5423 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
5424
5425 return r;
5426}
5427
5428int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5429{
5430 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
5431 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
5432 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
5433 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
5434 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
5435 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
5436 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
5437 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
5438#ifdef CONFIG_X86_64
5439 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
5440 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
5441 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
5442 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
5443 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
5444 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
5445 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
5446 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
5447#endif
5448
5449 regs->rip = kvm_rip_read(vcpu);
5450 regs->rflags = kvm_get_rflags(vcpu);
5451
5452 return 0;
5453}
5454
5455int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5456{
5457 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
5458 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
5459 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
5460 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
5461 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
5462 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
5463 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
5464 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
5465#ifdef CONFIG_X86_64
5466 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
5467 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
5468 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
5469 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
5470 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
5471 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
5472 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
5473 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
5474#endif
5475
5476 kvm_rip_write(vcpu, regs->rip);
5477 kvm_set_rflags(vcpu, regs->rflags);
5478
5479 vcpu->arch.exception.pending = false;
5480
5481 kvm_make_request(KVM_REQ_EVENT, vcpu);
5482
5483 return 0;
5484}
5485
5486void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
5487{
5488 struct kvm_segment cs;
5489
5490 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
5491 *db = cs.db;
5492 *l = cs.l;
5493}
5494EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
5495
5496int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
5497 struct kvm_sregs *sregs)
5498{
5499 struct desc_ptr dt;
5500
5501 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5502 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
5503 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
5504 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
5505 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
5506 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
5507
5508 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
5509 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
5510
5511 kvm_x86_ops->get_idt(vcpu, &dt);
5512 sregs->idt.limit = dt.size;
5513 sregs->idt.base = dt.address;
5514 kvm_x86_ops->get_gdt(vcpu, &dt);
5515 sregs->gdt.limit = dt.size;
5516 sregs->gdt.base = dt.address;
5517
5518 sregs->cr0 = kvm_read_cr0(vcpu);
5519 sregs->cr2 = vcpu->arch.cr2;
5520 sregs->cr3 = kvm_read_cr3(vcpu);
5521 sregs->cr4 = kvm_read_cr4(vcpu);
5522 sregs->cr8 = kvm_get_cr8(vcpu);
5523 sregs->efer = vcpu->arch.efer;
5524 sregs->apic_base = kvm_get_apic_base(vcpu);
5525
5526 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
5527
5528 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
5529 set_bit(vcpu->arch.interrupt.nr,
5530 (unsigned long *)sregs->interrupt_bitmap);
5531
5532 return 0;
5533}
5534
5535int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
5536 struct kvm_mp_state *mp_state)
5537{
5538 mp_state->mp_state = vcpu->arch.mp_state;
5539 return 0;
5540}
5541
5542int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
5543 struct kvm_mp_state *mp_state)
5544{
5545 vcpu->arch.mp_state = mp_state->mp_state;
5546 kvm_make_request(KVM_REQ_EVENT, vcpu);
5547 return 0;
5548}
5549
5550int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
5551 bool has_error_code, u32 error_code)
5552{
5553 struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
5554 int ret;
5555
5556 init_emulate_ctxt(vcpu);
5557
5558 ret = emulator_task_switch(&vcpu->arch.emulate_ctxt,
5559 tss_selector, reason, has_error_code,
5560 error_code);
5561
5562 if (ret)
5563 return EMULATE_FAIL;
5564
5565 memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
5566 kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
5567 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
5568 kvm_make_request(KVM_REQ_EVENT, vcpu);
5569 return EMULATE_DONE;
5570}
5571EXPORT_SYMBOL_GPL(kvm_task_switch);
5572
5573int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5574 struct kvm_sregs *sregs)
5575{
5576 int mmu_reset_needed = 0;
5577 int pending_vec, max_bits;
5578 struct desc_ptr dt;
5579
5580 dt.size = sregs->idt.limit;
5581 dt.address = sregs->idt.base;
5582 kvm_x86_ops->set_idt(vcpu, &dt);
5583 dt.size = sregs->gdt.limit;
5584 dt.address = sregs->gdt.base;
5585 kvm_x86_ops->set_gdt(vcpu, &dt);
5586
5587 vcpu->arch.cr2 = sregs->cr2;
5588 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
5589 vcpu->arch.cr3 = sregs->cr3;
5590 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
5591
5592 kvm_set_cr8(vcpu, sregs->cr8);
5593
5594 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
5595 kvm_x86_ops->set_efer(vcpu, sregs->efer);
5596 kvm_set_apic_base(vcpu, sregs->apic_base);
5597
5598 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
5599 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
5600 vcpu->arch.cr0 = sregs->cr0;
5601
5602 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5603 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5604 if (sregs->cr4 & X86_CR4_OSXSAVE)
5605 update_cpuid(vcpu);
5606 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5607 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
5608 mmu_reset_needed = 1;
5609 }
5610
5611 if (mmu_reset_needed)
5612 kvm_mmu_reset_context(vcpu);
5613
5614 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
5615 pending_vec = find_first_bit(
5616 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
5617 if (pending_vec < max_bits) {
5618 kvm_queue_interrupt(vcpu, pending_vec, false);
5619 pr_debug("Set back pending irq %d\n", pending_vec);
5620 if (irqchip_in_kernel(vcpu->kvm))
5621 kvm_pic_clear_isr_ack(vcpu->kvm);
5622 }
5623
5624 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
5625 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
5626 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
5627 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
5628 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
5629 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
5630
5631 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
5632 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
5633
5634 update_cr8_intercept(vcpu);
5635
5636
5637 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
5638 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
5639 !is_protmode(vcpu))
5640 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5641
5642 kvm_make_request(KVM_REQ_EVENT, vcpu);
5643
5644 return 0;
5645}
5646
5647int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
5648 struct kvm_guest_debug *dbg)
5649{
5650 unsigned long rflags;
5651 int i, r;
5652
5653 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
5654 r = -EBUSY;
5655 if (vcpu->arch.exception.pending)
5656 goto out;
5657 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
5658 kvm_queue_exception(vcpu, DB_VECTOR);
5659 else
5660 kvm_queue_exception(vcpu, BP_VECTOR);
5661 }
5662
5663
5664
5665
5666
5667 rflags = kvm_get_rflags(vcpu);
5668
5669 vcpu->guest_debug = dbg->control;
5670 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
5671 vcpu->guest_debug = 0;
5672
5673 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5674 for (i = 0; i < KVM_NR_DB_REGS; ++i)
5675 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
5676 vcpu->arch.switch_db_regs =
5677 (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
5678 } else {
5679 for (i = 0; i < KVM_NR_DB_REGS; i++)
5680 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
5681 vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
5682 }
5683
5684 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
5685 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
5686 get_segment_base(vcpu, VCPU_SREG_CS);
5687
5688
5689
5690
5691
5692 kvm_set_rflags(vcpu, rflags);
5693
5694 kvm_x86_ops->set_guest_debug(vcpu, dbg);
5695
5696 r = 0;
5697
5698out:
5699
5700 return r;
5701}
5702
5703
5704
5705
5706int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
5707 struct kvm_translation *tr)
5708{
5709 unsigned long vaddr = tr->linear_address;
5710 gpa_t gpa;
5711 int idx;
5712
5713 idx = srcu_read_lock(&vcpu->kvm->srcu);
5714 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
5715 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5716 tr->physical_address = gpa;
5717 tr->valid = gpa != UNMAPPED_GVA;
5718 tr->writeable = 1;
5719 tr->usermode = 0;
5720
5721 return 0;
5722}
5723
5724int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5725{
5726 struct i387_fxsave_struct *fxsave =
5727 &vcpu->arch.guest_fpu.state->fxsave;
5728
5729 memcpy(fpu->fpr, fxsave->st_space, 128);
5730 fpu->fcw = fxsave->cwd;
5731 fpu->fsw = fxsave->swd;
5732 fpu->ftwx = fxsave->twd;
5733 fpu->last_opcode = fxsave->fop;
5734 fpu->last_ip = fxsave->rip;
5735 fpu->last_dp = fxsave->rdp;
5736 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
5737
5738 return 0;
5739}
5740
5741int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
5742{
5743 struct i387_fxsave_struct *fxsave =
5744 &vcpu->arch.guest_fpu.state->fxsave;
5745
5746 memcpy(fxsave->st_space, fpu->fpr, 128);
5747 fxsave->cwd = fpu->fcw;
5748 fxsave->swd = fpu->fsw;
5749 fxsave->twd = fpu->ftwx;
5750 fxsave->fop = fpu->last_opcode;
5751 fxsave->rip = fpu->last_ip;
5752 fxsave->rdp = fpu->last_dp;
5753 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
5754
5755 return 0;
5756}
5757
5758int fx_init(struct kvm_vcpu *vcpu)
5759{
5760 int err;
5761
5762 err = fpu_alloc(&vcpu->arch.guest_fpu);
5763 if (err)
5764 return err;
5765
5766 fpu_finit(&vcpu->arch.guest_fpu);
5767
5768
5769
5770
5771 vcpu->arch.xcr0 = XSTATE_FP;
5772
5773 vcpu->arch.cr0 |= X86_CR0_ET;
5774
5775 return 0;
5776}
5777EXPORT_SYMBOL_GPL(fx_init);
5778
5779static void fx_free(struct kvm_vcpu *vcpu)
5780{
5781 fpu_free(&vcpu->arch.guest_fpu);
5782}
5783
5784void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
5785{
5786 if (vcpu->guest_fpu_loaded)
5787 return;
5788
5789
5790
5791
5792
5793
5794 kvm_put_guest_xcr0(vcpu);
5795 vcpu->guest_fpu_loaded = 1;
5796 unlazy_fpu(current);
5797 fpu_restore_checking(&vcpu->arch.guest_fpu);
5798 trace_kvm_fpu(1);
5799}
5800
5801void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
5802{
5803 kvm_put_guest_xcr0(vcpu);
5804
5805 if (!vcpu->guest_fpu_loaded)
5806 return;
5807
5808 vcpu->guest_fpu_loaded = 0;
5809 fpu_save_init(&vcpu->arch.guest_fpu);
5810 ++vcpu->stat.fpu_reload;
5811 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
5812 trace_kvm_fpu(0);
5813}
5814
5815void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
5816{
5817 if (vcpu->arch.time_page) {
5818 kvm_release_page_dirty(vcpu->arch.time_page);
5819 vcpu->arch.time_page = NULL;
5820 }
5821
5822 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
5823 fx_free(vcpu);
5824 kvm_x86_ops->vcpu_free(vcpu);
5825}
5826
5827struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
5828 unsigned int id)
5829{
5830 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
5831 printk_once(KERN_WARNING
5832 "kvm: SMP vm created on host with unstable TSC; "
5833 "guest TSC will not be reliable\n");
5834 return kvm_x86_ops->vcpu_create(kvm, id);
5835}
5836
5837int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
5838{
5839 int r;
5840
5841 vcpu->arch.mtrr_state.have_fixed = 1;
5842 vcpu_load(vcpu);
5843 r = kvm_arch_vcpu_reset(vcpu);
5844 if (r == 0)
5845 r = kvm_mmu_setup(vcpu);
5846 vcpu_put(vcpu);
5847 if (r < 0)
5848 goto free_vcpu;
5849
5850 return 0;
5851free_vcpu:
5852 kvm_x86_ops->vcpu_free(vcpu);
5853 return r;
5854}
5855
5856void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
5857{
5858 vcpu->arch.apf.msr_val = 0;
5859
5860 vcpu_load(vcpu);
5861 kvm_mmu_unload(vcpu);
5862 vcpu_put(vcpu);
5863
5864 fx_free(vcpu);
5865 kvm_x86_ops->vcpu_free(vcpu);
5866}
5867
5868int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
5869{
5870 vcpu->arch.nmi_pending = false;
5871 vcpu->arch.nmi_injected = false;
5872
5873 vcpu->arch.switch_db_regs = 0;
5874 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
5875 vcpu->arch.dr6 = DR6_FIXED_1;
5876 vcpu->arch.dr7 = DR7_FIXED_1;
5877
5878 kvm_make_request(KVM_REQ_EVENT, vcpu);
5879 vcpu->arch.apf.msr_val = 0;
5880
5881 kvm_clear_async_pf_completion_queue(vcpu);
5882 kvm_async_pf_hash_reset(vcpu);
5883 vcpu->arch.apf.halted = false;
5884
5885 return kvm_x86_ops->vcpu_reset(vcpu);
5886}
5887
5888int kvm_arch_hardware_enable(void *garbage)
5889{
5890 struct kvm *kvm;
5891 struct kvm_vcpu *vcpu;
5892 int i;
5893
5894 kvm_shared_msr_cpu_online();
5895 list_for_each_entry(kvm, &vm_list, vm_list)
5896 kvm_for_each_vcpu(i, vcpu, kvm)
5897 if (vcpu->cpu == smp_processor_id())
5898 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5899 return kvm_x86_ops->hardware_enable(garbage);
5900}
5901
5902void kvm_arch_hardware_disable(void *garbage)
5903{
5904 kvm_x86_ops->hardware_disable(garbage);
5905 drop_user_return_notifiers(garbage);
5906}
5907
5908int kvm_arch_hardware_setup(void)
5909{
5910 return kvm_x86_ops->hardware_setup();
5911}
5912
5913void kvm_arch_hardware_unsetup(void)
5914{
5915 kvm_x86_ops->hardware_unsetup();
5916}
5917
5918void kvm_arch_check_processor_compat(void *rtn)
5919{
5920 kvm_x86_ops->check_processor_compatibility(rtn);
5921}
5922
5923int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5924{
5925 struct page *page;
5926 struct kvm *kvm;
5927 int r;
5928
5929 BUG_ON(vcpu->kvm == NULL);
5930 kvm = vcpu->kvm;
5931
5932 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
5933 vcpu->arch.walk_mmu = &vcpu->arch.mmu;
5934 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
5935 vcpu->arch.mmu.translate_gpa = translate_gpa;
5936 vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
5937 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
5938 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5939 else
5940 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
5941
5942 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
5943 if (!page) {
5944 r = -ENOMEM;
5945 goto fail;
5946 }
5947 vcpu->arch.pio_data = page_address(page);
5948
5949 if (!kvm->arch.virtual_tsc_khz)
5950 kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
5951
5952 r = kvm_mmu_create(vcpu);
5953 if (r < 0)
5954 goto fail_free_pio_data;
5955
5956 if (irqchip_in_kernel(kvm)) {
5957 r = kvm_create_lapic(vcpu);
5958 if (r < 0)
5959 goto fail_mmu_destroy;
5960 }
5961
5962 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
5963 GFP_KERNEL);
5964 if (!vcpu->arch.mce_banks) {
5965 r = -ENOMEM;
5966 goto fail_free_lapic;
5967 }
5968 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5969
5970 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
5971 goto fail_free_mce_banks;
5972
5973 kvm_async_pf_hash_reset(vcpu);
5974
5975 return 0;
5976fail_free_mce_banks:
5977 kfree(vcpu->arch.mce_banks);
5978fail_free_lapic:
5979 kvm_free_lapic(vcpu);
5980fail_mmu_destroy:
5981 kvm_mmu_destroy(vcpu);
5982fail_free_pio_data:
5983 free_page((unsigned long)vcpu->arch.pio_data);
5984fail:
5985 return r;
5986}
5987
5988void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5989{
5990 int idx;
5991
5992 kfree(vcpu->arch.mce_banks);
5993 kvm_free_lapic(vcpu);
5994 idx = srcu_read_lock(&vcpu->kvm->srcu);
5995 kvm_mmu_destroy(vcpu);
5996 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5997 free_page((unsigned long)vcpu->arch.pio_data);
5998}
5999
6000int kvm_arch_init_vm(struct kvm *kvm)
6001{
6002 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6003 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6004
6005
6006 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
6007
6008 spin_lock_init(&kvm->arch.tsc_write_lock);
6009
6010 return 0;
6011}
6012
6013static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
6014{
6015 vcpu_load(vcpu);
6016 kvm_mmu_unload(vcpu);
6017 vcpu_put(vcpu);
6018}
6019
6020static void kvm_free_vcpus(struct kvm *kvm)
6021{
6022 unsigned int i;
6023 struct kvm_vcpu *vcpu;
6024
6025
6026
6027
6028 kvm_for_each_vcpu(i, vcpu, kvm) {
6029 kvm_clear_async_pf_completion_queue(vcpu);
6030 kvm_unload_vcpu_mmu(vcpu);
6031 }
6032 kvm_for_each_vcpu(i, vcpu, kvm)
6033 kvm_arch_vcpu_free(vcpu);
6034
6035 mutex_lock(&kvm->lock);
6036 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
6037 kvm->vcpus[i] = NULL;
6038
6039 atomic_set(&kvm->online_vcpus, 0);
6040 mutex_unlock(&kvm->lock);
6041}
6042
6043void kvm_arch_sync_events(struct kvm *kvm)
6044{
6045 kvm_free_all_assigned_devices(kvm);
6046 kvm_free_pit(kvm);
6047}
6048
6049void kvm_arch_destroy_vm(struct kvm *kvm)
6050{
6051 kvm_iommu_unmap_guest(kvm);
6052 kfree(kvm->arch.vpic);
6053 kfree(kvm->arch.vioapic);
6054 kvm_free_vcpus(kvm);
6055 if (kvm->arch.apic_access_page)
6056 put_page(kvm->arch.apic_access_page);
6057 if (kvm->arch.ept_identity_pagetable)
6058 put_page(kvm->arch.ept_identity_pagetable);
6059}
6060
6061int kvm_arch_prepare_memory_region(struct kvm *kvm,
6062 struct kvm_memory_slot *memslot,
6063 struct kvm_memory_slot old,
6064 struct kvm_userspace_memory_region *mem,
6065 int user_alloc)
6066{
6067 int npages = memslot->npages;
6068 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6069
6070
6071 if (memslot->id >= KVM_MEMORY_SLOTS)
6072 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6073
6074
6075
6076
6077 if (!user_alloc) {
6078 if (npages && !old.rmap) {
6079 unsigned long userspace_addr;
6080
6081 down_write(¤t->mm->mmap_sem);
6082 userspace_addr = do_mmap(NULL, 0,
6083 npages * PAGE_SIZE,
6084 PROT_READ | PROT_WRITE,
6085 map_flags,
6086 0);
6087 up_write(¤t->mm->mmap_sem);
6088
6089 if (IS_ERR((void *)userspace_addr))
6090 return PTR_ERR((void *)userspace_addr);
6091
6092 memslot->userspace_addr = userspace_addr;
6093 }
6094 }
6095
6096
6097 return 0;
6098}
6099
6100void kvm_arch_commit_memory_region(struct kvm *kvm,
6101 struct kvm_userspace_memory_region *mem,
6102 struct kvm_memory_slot old,
6103 int user_alloc)
6104{
6105
6106 int npages = mem->memory_size >> PAGE_SHIFT;
6107
6108 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
6109 int ret;
6110
6111 down_write(¤t->mm->mmap_sem);
6112 ret = do_munmap(current->mm, old.userspace_addr,
6113 old.npages * PAGE_SIZE);
6114 up_write(¤t->mm->mmap_sem);
6115 if (ret < 0)
6116 printk(KERN_WARNING
6117 "kvm_vm_ioctl_set_memory_region: "
6118 "failed to munmap memory\n");
6119 }
6120
6121 spin_lock(&kvm->mmu_lock);
6122 if (!kvm->arch.n_requested_mmu_pages) {
6123 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6124 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6125 }
6126
6127 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6128 spin_unlock(&kvm->mmu_lock);
6129}
6130
6131void kvm_arch_flush_shadow(struct kvm *kvm)
6132{
6133 kvm_mmu_zap_all(kvm);
6134 kvm_reload_remote_mmus(kvm);
6135}
6136
6137int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6138{
6139 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6140 !vcpu->arch.apf.halted)
6141 || !list_empty_careful(&vcpu->async_pf.done)
6142 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6143 || vcpu->arch.nmi_pending ||
6144 (kvm_arch_interrupt_allowed(vcpu) &&
6145 kvm_cpu_has_interrupt(vcpu));
6146}
6147
6148void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
6149{
6150 int me;
6151 int cpu = vcpu->cpu;
6152
6153 if (waitqueue_active(&vcpu->wq)) {
6154 wake_up_interruptible(&vcpu->wq);
6155 ++vcpu->stat.halt_wakeup;
6156 }
6157
6158 me = get_cpu();
6159 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
6160 if (atomic_xchg(&vcpu->guest_mode, 0))
6161 smp_send_reschedule(cpu);
6162 put_cpu();
6163}
6164
6165int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
6166{
6167 return kvm_x86_ops->interrupt_allowed(vcpu);
6168}
6169
6170bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
6171{
6172 unsigned long current_rip = kvm_rip_read(vcpu) +
6173 get_segment_base(vcpu, VCPU_SREG_CS);
6174
6175 return current_rip == linear_rip;
6176}
6177EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
6178
6179unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
6180{
6181 unsigned long rflags;
6182
6183 rflags = kvm_x86_ops->get_rflags(vcpu);
6184 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6185 rflags &= ~X86_EFLAGS_TF;
6186 return rflags;
6187}
6188EXPORT_SYMBOL_GPL(kvm_get_rflags);
6189
6190void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
6191{
6192 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
6193 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
6194 rflags |= X86_EFLAGS_TF;
6195 kvm_x86_ops->set_rflags(vcpu, rflags);
6196 kvm_make_request(KVM_REQ_EVENT, vcpu);
6197}
6198EXPORT_SYMBOL_GPL(kvm_set_rflags);
6199
6200void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
6201{
6202 int r;
6203
6204 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
6205 is_error_page(work->page))
6206 return;
6207
6208 r = kvm_mmu_reload(vcpu);
6209 if (unlikely(r))
6210 return;
6211
6212 if (!vcpu->arch.mmu.direct_map &&
6213 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
6214 return;
6215
6216 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
6217}
6218
6219static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
6220{
6221 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
6222}
6223
6224static inline u32 kvm_async_pf_next_probe(u32 key)
6225{
6226 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
6227}
6228
6229static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6230{
6231 u32 key = kvm_async_pf_hash_fn(gfn);
6232
6233 while (vcpu->arch.apf.gfns[key] != ~0)
6234 key = kvm_async_pf_next_probe(key);
6235
6236 vcpu->arch.apf.gfns[key] = gfn;
6237}
6238
6239static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
6240{
6241 int i;
6242 u32 key = kvm_async_pf_hash_fn(gfn);
6243
6244 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
6245 (vcpu->arch.apf.gfns[key] != gfn &&
6246 vcpu->arch.apf.gfns[key] != ~0); i++)
6247 key = kvm_async_pf_next_probe(key);
6248
6249 return key;
6250}
6251
6252bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6253{
6254 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
6255}
6256
6257static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6258{
6259 u32 i, j, k;
6260
6261 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
6262 while (true) {
6263 vcpu->arch.apf.gfns[i] = ~0;
6264 do {
6265 j = kvm_async_pf_next_probe(j);
6266 if (vcpu->arch.apf.gfns[j] == ~0)
6267 return;
6268 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
6269
6270
6271
6272
6273
6274 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
6275 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
6276 i = j;
6277 }
6278}
6279
6280static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
6281{
6282
6283 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
6284 sizeof(val));
6285}
6286
6287void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
6288 struct kvm_async_pf *work)
6289{
6290 struct x86_exception fault;
6291
6292 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
6293 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
6294
6295 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
6296 (vcpu->arch.apf.send_user_only &&
6297 kvm_x86_ops->get_cpl(vcpu) == 0))
6298 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
6299 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
6300 fault.vector = PF_VECTOR;
6301 fault.error_code_valid = true;
6302 fault.error_code = 0;
6303 fault.nested_page_fault = false;
6304 fault.address = work->arch.token;
6305 kvm_inject_page_fault(vcpu, &fault);
6306 }
6307}
6308
6309void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6310 struct kvm_async_pf *work)
6311{
6312 struct x86_exception fault;
6313
6314 trace_kvm_async_pf_ready(work->arch.token, work->gva);
6315 if (is_error_page(work->page))
6316 work->arch.token = ~0;
6317 else
6318 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
6319
6320 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
6321 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
6322 fault.vector = PF_VECTOR;
6323 fault.error_code_valid = true;
6324 fault.error_code = 0;
6325 fault.nested_page_fault = false;
6326 fault.address = work->arch.token;
6327 kvm_inject_page_fault(vcpu, &fault);
6328 }
6329 vcpu->arch.apf.halted = false;
6330}
6331
6332bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
6333{
6334 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
6335 return true;
6336 else
6337 return !kvm_event_needs_reinjection(vcpu) &&
6338 kvm_x86_ops->interrupt_allowed(vcpu);
6339}
6340
6341EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
6342EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
6343EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
6344EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
6345EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
6346EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
6347EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
6348EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
6349EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
6350EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
6351EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
6352EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
6353