1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30
31#include <linux/clocksource.h>
32#include <linux/interrupt.h>
33#include <linux/kvm.h>
34#include <linux/fs.h>
35#include <linux/vmalloc.h>
36#include <linux/module.h>
37#include <linux/mman.h>
38#include <linux/highmem.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/cpufreq.h>
42#include <linux/user-return-notifier.h>
43#include <linux/srcu.h>
44#include <linux/slab.h>
45#include <linux/perf_event.h>
46#include <linux/uaccess.h>
47#include <linux/hash.h>
48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
51#include <trace/events/kvm.h>
52
53#define CREATE_TRACE_POINTS
54#include "trace.h"
55
56#include <asm/debugreg.h>
57#include <asm/msr.h>
58#include <asm/desc.h>
59#include <asm/mtrr.h>
60#include <asm/mce.h>
61#include <asm/i387.h>
62#include <asm/fpu-internal.h>
63#include <asm/xcr.h>
64#include <asm/pvclock.h>
65#include <asm/div64.h>
66
67#define MAX_IO_MSRS 256
68#define KVM_MAX_MCE_BANKS 32
69#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
70
71#define emul_to_vcpu(ctxt) \
72 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
73
74
75
76
77
78#ifdef CONFIG_X86_64
79static
80u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
81#else
82static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
83#endif
84
85#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
86#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
87
88static void update_cr8_intercept(struct kvm_vcpu *vcpu);
89static void process_nmi(struct kvm_vcpu *vcpu);
90
91struct kvm_x86_ops *kvm_x86_ops;
92EXPORT_SYMBOL_GPL(kvm_x86_ops);
93
94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96
97unsigned int min_timer_period_us = 500;
98module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
99
100bool kvm_has_tsc_control;
101EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
102u32 kvm_max_guest_tsc_khz;
103EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
104
105
106static u32 tsc_tolerance_ppm = 250;
107module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
108
109static bool backwards_tsc_observed = false;
110
111#define KVM_NR_SHARED_MSRS 16
112
113struct kvm_shared_msrs_global {
114 int nr;
115 u32 msrs[KVM_NR_SHARED_MSRS];
116};
117
118struct kvm_shared_msrs {
119 struct user_return_notifier urn;
120 bool registered;
121 struct kvm_shared_msr_values {
122 u64 host;
123 u64 curr;
124 } values[KVM_NR_SHARED_MSRS];
125};
126
127static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
128static struct kvm_shared_msrs __percpu *shared_msrs;
129
130struct kvm_stats_debugfs_item debugfs_entries[] = {
131 { "pf_fixed", VCPU_STAT(pf_fixed) },
132 { "pf_guest", VCPU_STAT(pf_guest) },
133 { "tlb_flush", VCPU_STAT(tlb_flush) },
134 { "invlpg", VCPU_STAT(invlpg) },
135 { "exits", VCPU_STAT(exits) },
136 { "io_exits", VCPU_STAT(io_exits) },
137 { "mmio_exits", VCPU_STAT(mmio_exits) },
138 { "signal_exits", VCPU_STAT(signal_exits) },
139 { "irq_window", VCPU_STAT(irq_window_exits) },
140 { "nmi_window", VCPU_STAT(nmi_window_exits) },
141 { "halt_exits", VCPU_STAT(halt_exits) },
142 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
143 { "hypercalls", VCPU_STAT(hypercalls) },
144 { "request_irq", VCPU_STAT(request_irq_exits) },
145 { "irq_exits", VCPU_STAT(irq_exits) },
146 { "host_state_reload", VCPU_STAT(host_state_reload) },
147 { "efer_reload", VCPU_STAT(efer_reload) },
148 { "fpu_reload", VCPU_STAT(fpu_reload) },
149 { "insn_emulation", VCPU_STAT(insn_emulation) },
150 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
151 { "irq_injections", VCPU_STAT(irq_injections) },
152 { "nmi_injections", VCPU_STAT(nmi_injections) },
153 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
154 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
155 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
156 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
157 { "mmu_flooded", VM_STAT(mmu_flooded) },
158 { "mmu_recycled", VM_STAT(mmu_recycled) },
159 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
160 { "mmu_unsync", VM_STAT(mmu_unsync) },
161 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
162 { "largepages", VM_STAT(lpages) },
163 { NULL }
164};
165
166u64 __read_mostly host_xcr0;
167
168static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
169
170static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
171{
172 int i;
173 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
174 vcpu->arch.apf.gfns[i] = ~0;
175}
176
177static void kvm_on_user_return(struct user_return_notifier *urn)
178{
179 unsigned slot;
180 struct kvm_shared_msrs *locals
181 = container_of(urn, struct kvm_shared_msrs, urn);
182 struct kvm_shared_msr_values *values;
183
184 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
185 values = &locals->values[slot];
186 if (values->host != values->curr) {
187 wrmsrl(shared_msrs_global.msrs[slot], values->host);
188 values->curr = values->host;
189 }
190 }
191 locals->registered = false;
192 user_return_notifier_unregister(urn);
193}
194
195static void shared_msr_update(unsigned slot, u32 msr)
196{
197 u64 value;
198 unsigned int cpu = smp_processor_id();
199 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
200
201
202
203 if (slot >= shared_msrs_global.nr) {
204 printk(KERN_ERR "kvm: invalid MSR slot!");
205 return;
206 }
207 rdmsrl_safe(msr, &value);
208 smsr->values[slot].host = value;
209 smsr->values[slot].curr = value;
210}
211
212void kvm_define_shared_msr(unsigned slot, u32 msr)
213{
214 if (slot >= shared_msrs_global.nr)
215 shared_msrs_global.nr = slot + 1;
216 shared_msrs_global.msrs[slot] = msr;
217
218 smp_wmb();
219}
220EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
221
222static void kvm_shared_msr_cpu_online(void)
223{
224 unsigned i;
225
226 for (i = 0; i < shared_msrs_global.nr; ++i)
227 shared_msr_update(i, shared_msrs_global.msrs[i]);
228}
229
230void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
231{
232 unsigned int cpu = smp_processor_id();
233 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
234
235 if (((value ^ smsr->values[slot].curr) & mask) == 0)
236 return;
237 smsr->values[slot].curr = value;
238 wrmsrl(shared_msrs_global.msrs[slot], value);
239 if (!smsr->registered) {
240 smsr->urn.on_user_return = kvm_on_user_return;
241 user_return_notifier_register(&smsr->urn);
242 smsr->registered = true;
243 }
244}
245EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
246
247static void drop_user_return_notifiers(void *ignore)
248{
249 unsigned int cpu = smp_processor_id();
250 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
251
252 if (smsr->registered)
253 kvm_on_user_return(&smsr->urn);
254}
255
256u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
257{
258 return vcpu->arch.apic_base;
259}
260EXPORT_SYMBOL_GPL(kvm_get_apic_base);
261
262int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
263{
264 u64 old_state = vcpu->arch.apic_base &
265 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
266 u64 new_state = msr_info->data &
267 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
268 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
269 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
270
271 if (!msr_info->host_initiated &&
272 ((msr_info->data & reserved_bits) != 0 ||
273 new_state == X2APIC_ENABLE ||
274 (new_state == MSR_IA32_APICBASE_ENABLE &&
275 old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
276 (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
277 old_state == 0)))
278 return 1;
279
280 kvm_lapic_set_base(vcpu, msr_info->data);
281 return 0;
282}
283EXPORT_SYMBOL_GPL(kvm_set_apic_base);
284
285asmlinkage __visible void kvm_spurious_fault(void)
286{
287
288 BUG();
289}
290EXPORT_SYMBOL_GPL(kvm_spurious_fault);
291
292#define EXCPT_BENIGN 0
293#define EXCPT_CONTRIBUTORY 1
294#define EXCPT_PF 2
295
296static int exception_class(int vector)
297{
298 switch (vector) {
299 case PF_VECTOR:
300 return EXCPT_PF;
301 case DE_VECTOR:
302 case TS_VECTOR:
303 case NP_VECTOR:
304 case SS_VECTOR:
305 case GP_VECTOR:
306 return EXCPT_CONTRIBUTORY;
307 default:
308 break;
309 }
310 return EXCPT_BENIGN;
311}
312
313static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
314 unsigned nr, bool has_error, u32 error_code,
315 bool reinject)
316{
317 u32 prev_nr;
318 int class1, class2;
319
320 kvm_make_request(KVM_REQ_EVENT, vcpu);
321
322 if (!vcpu->arch.exception.pending) {
323 queue:
324 vcpu->arch.exception.pending = true;
325 vcpu->arch.exception.has_error_code = has_error;
326 vcpu->arch.exception.nr = nr;
327 vcpu->arch.exception.error_code = error_code;
328 vcpu->arch.exception.reinject = reinject;
329 return;
330 }
331
332
333 prev_nr = vcpu->arch.exception.nr;
334 if (prev_nr == DF_VECTOR) {
335
336 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
337 return;
338 }
339 class1 = exception_class(prev_nr);
340 class2 = exception_class(nr);
341 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
342 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
343
344 vcpu->arch.exception.pending = true;
345 vcpu->arch.exception.has_error_code = true;
346 vcpu->arch.exception.nr = DF_VECTOR;
347 vcpu->arch.exception.error_code = 0;
348 } else
349
350
351
352 goto queue;
353}
354
355void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
356{
357 kvm_multiple_exception(vcpu, nr, false, 0, false);
358}
359EXPORT_SYMBOL_GPL(kvm_queue_exception);
360
361void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
362{
363 kvm_multiple_exception(vcpu, nr, false, 0, true);
364}
365EXPORT_SYMBOL_GPL(kvm_requeue_exception);
366
367void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
368{
369 if (err)
370 kvm_inject_gp(vcpu, 0);
371 else
372 kvm_x86_ops->skip_emulated_instruction(vcpu);
373}
374EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
375
376void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
377{
378 ++vcpu->stat.pf_guest;
379 vcpu->arch.cr2 = fault->address;
380 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
381}
382EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
383
384void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
385{
386 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
387 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
388 else
389 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
390}
391
392void kvm_inject_nmi(struct kvm_vcpu *vcpu)
393{
394 atomic_inc(&vcpu->arch.nmi_queued);
395 kvm_make_request(KVM_REQ_NMI, vcpu);
396}
397EXPORT_SYMBOL_GPL(kvm_inject_nmi);
398
399void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
400{
401 kvm_multiple_exception(vcpu, nr, true, error_code, false);
402}
403EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
404
405void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
406{
407 kvm_multiple_exception(vcpu, nr, true, error_code, true);
408}
409EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
410
411
412
413
414
415bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
416{
417 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
418 return true;
419 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
420 return false;
421}
422EXPORT_SYMBOL_GPL(kvm_require_cpl);
423
424
425
426
427
428
429int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
430 gfn_t ngfn, void *data, int offset, int len,
431 u32 access)
432{
433 gfn_t real_gfn;
434 gpa_t ngpa;
435
436 ngpa = gfn_to_gpa(ngfn);
437 real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
438 if (real_gfn == UNMAPPED_GVA)
439 return -EFAULT;
440
441 real_gfn = gpa_to_gfn(real_gfn);
442
443 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
444}
445EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
446
447int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
448 void *data, int offset, int len, u32 access)
449{
450 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
451 data, offset, len, access);
452}
453
454
455
456
457int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
458{
459 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
460 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
461 int i;
462 int ret;
463 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
464
465 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
466 offset * sizeof(u64), sizeof(pdpte),
467 PFERR_USER_MASK|PFERR_WRITE_MASK);
468 if (ret < 0) {
469 ret = 0;
470 goto out;
471 }
472 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
473 if (is_present_gpte(pdpte[i]) &&
474 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
475 ret = 0;
476 goto out;
477 }
478 }
479 ret = 1;
480
481 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
482 __set_bit(VCPU_EXREG_PDPTR,
483 (unsigned long *)&vcpu->arch.regs_avail);
484 __set_bit(VCPU_EXREG_PDPTR,
485 (unsigned long *)&vcpu->arch.regs_dirty);
486out:
487
488 return ret;
489}
490EXPORT_SYMBOL_GPL(load_pdptrs);
491
492static bool pdptrs_changed(struct kvm_vcpu *vcpu)
493{
494 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
495 bool changed = true;
496 int offset;
497 gfn_t gfn;
498 int r;
499
500 if (is_long_mode(vcpu) || !is_pae(vcpu))
501 return false;
502
503 if (!test_bit(VCPU_EXREG_PDPTR,
504 (unsigned long *)&vcpu->arch.regs_avail))
505 return true;
506
507 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
508 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
509 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
510 PFERR_USER_MASK | PFERR_WRITE_MASK);
511 if (r < 0)
512 goto out;
513 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
514out:
515
516 return changed;
517}
518
519int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
520{
521 unsigned long old_cr0 = kvm_read_cr0(vcpu);
522 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
523 X86_CR0_CD | X86_CR0_NW;
524
525 cr0 |= X86_CR0_ET;
526
527#ifdef CONFIG_X86_64
528 if (cr0 & 0xffffffff00000000UL)
529 return 1;
530#endif
531
532 cr0 &= ~CR0_RESERVED_BITS;
533
534 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
535 return 1;
536
537 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
538 return 1;
539
540 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
541#ifdef CONFIG_X86_64
542 if ((vcpu->arch.efer & EFER_LME)) {
543 int cs_db, cs_l;
544
545 if (!is_pae(vcpu))
546 return 1;
547 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
548 if (cs_l)
549 return 1;
550 } else
551#endif
552 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
553 kvm_read_cr3(vcpu)))
554 return 1;
555 }
556
557 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
558 return 1;
559
560 kvm_x86_ops->set_cr0(vcpu, cr0);
561
562 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
563 kvm_clear_async_pf_completion_queue(vcpu);
564 kvm_async_pf_hash_reset(vcpu);
565 }
566
567 if ((cr0 ^ old_cr0) & update_bits)
568 kvm_mmu_reset_context(vcpu);
569 return 0;
570}
571EXPORT_SYMBOL_GPL(kvm_set_cr0);
572
573void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
574{
575 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
576}
577EXPORT_SYMBOL_GPL(kvm_lmsw);
578
579static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
580{
581 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
582 !vcpu->guest_xcr0_loaded) {
583
584 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
585 vcpu->guest_xcr0_loaded = 1;
586 }
587}
588
589static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
590{
591 if (vcpu->guest_xcr0_loaded) {
592 if (vcpu->arch.xcr0 != host_xcr0)
593 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
594 vcpu->guest_xcr0_loaded = 0;
595 }
596}
597
598int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
599{
600 u64 xcr0 = xcr;
601 u64 old_xcr0 = vcpu->arch.xcr0;
602 u64 valid_bits;
603
604
605 if (index != XCR_XFEATURE_ENABLED_MASK)
606 return 1;
607 if (!(xcr0 & XSTATE_FP))
608 return 1;
609 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
610 return 1;
611
612
613
614
615
616
617 valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
618 if (xcr0 & ~valid_bits)
619 return 1;
620
621 if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
622 return 1;
623
624 kvm_put_guest_xcr0(vcpu);
625 vcpu->arch.xcr0 = xcr0;
626
627 if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
628 kvm_update_cpuid(vcpu);
629 return 0;
630}
631
632int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
633{
634 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
635 __kvm_set_xcr(vcpu, index, xcr)) {
636 kvm_inject_gp(vcpu, 0);
637 return 1;
638 }
639 return 0;
640}
641EXPORT_SYMBOL_GPL(kvm_set_xcr);
642
643int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
644{
645 unsigned long old_cr4 = kvm_read_cr4(vcpu);
646 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
647 X86_CR4_PAE | X86_CR4_SMEP;
648 if (cr4 & CR4_RESERVED_BITS)
649 return 1;
650
651 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
652 return 1;
653
654 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
655 return 1;
656
657 if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
658 return 1;
659
660 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
661 return 1;
662
663 if (is_long_mode(vcpu)) {
664 if (!(cr4 & X86_CR4_PAE))
665 return 1;
666 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
667 && ((cr4 ^ old_cr4) & pdptr_bits)
668 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
669 kvm_read_cr3(vcpu)))
670 return 1;
671
672 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
673 if (!guest_cpuid_has_pcid(vcpu))
674 return 1;
675
676
677 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
678 return 1;
679 }
680
681 if (kvm_x86_ops->set_cr4(vcpu, cr4))
682 return 1;
683
684 if (((cr4 ^ old_cr4) & pdptr_bits) ||
685 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
686 kvm_mmu_reset_context(vcpu);
687
688 if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
689 update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
690
691 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
692 kvm_update_cpuid(vcpu);
693
694 return 0;
695}
696EXPORT_SYMBOL_GPL(kvm_set_cr4);
697
698int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
699{
700 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
701 kvm_mmu_sync_roots(vcpu);
702 kvm_mmu_flush_tlb(vcpu);
703 return 0;
704 }
705
706 if (is_long_mode(vcpu)) {
707 if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
708 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
709 return 1;
710 } else
711 if (cr3 & CR3_L_MODE_RESERVED_BITS)
712 return 1;
713 } else {
714 if (is_pae(vcpu)) {
715 if (cr3 & CR3_PAE_RESERVED_BITS)
716 return 1;
717 if (is_paging(vcpu) &&
718 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
719 return 1;
720 }
721
722
723
724
725 }
726
727 vcpu->arch.cr3 = cr3;
728 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
729 kvm_mmu_new_cr3(vcpu);
730 return 0;
731}
732EXPORT_SYMBOL_GPL(kvm_set_cr3);
733
734int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
735{
736 if (cr8 & CR8_RESERVED_BITS)
737 return 1;
738 if (irqchip_in_kernel(vcpu->kvm))
739 kvm_lapic_set_tpr(vcpu, cr8);
740 else
741 vcpu->arch.cr8 = cr8;
742 return 0;
743}
744EXPORT_SYMBOL_GPL(kvm_set_cr8);
745
746unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
747{
748 if (irqchip_in_kernel(vcpu->kvm))
749 return kvm_lapic_get_cr8(vcpu);
750 else
751 return vcpu->arch.cr8;
752}
753EXPORT_SYMBOL_GPL(kvm_get_cr8);
754
755static void kvm_update_dr6(struct kvm_vcpu *vcpu)
756{
757 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
758 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
759}
760
761static void kvm_update_dr7(struct kvm_vcpu *vcpu)
762{
763 unsigned long dr7;
764
765 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
766 dr7 = vcpu->arch.guest_debug_dr7;
767 else
768 dr7 = vcpu->arch.dr7;
769 kvm_x86_ops->set_dr7(vcpu, dr7);
770 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
771 if (dr7 & DR7_BP_EN_MASK)
772 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
773}
774
775static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
776{
777 switch (dr) {
778 case 0 ... 3:
779 vcpu->arch.db[dr] = val;
780 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
781 vcpu->arch.eff_db[dr] = val;
782 break;
783 case 4:
784 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
785 return 1;
786
787 case 6:
788 if (val & 0xffffffff00000000ULL)
789 return -1;
790 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
791 kvm_update_dr6(vcpu);
792 break;
793 case 5:
794 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
795 return 1;
796
797 default:
798 if (val & 0xffffffff00000000ULL)
799 return -1;
800 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
801 kvm_update_dr7(vcpu);
802 break;
803 }
804
805 return 0;
806}
807
808int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
809{
810 int res;
811
812 res = __kvm_set_dr(vcpu, dr, val);
813 if (res > 0)
814 kvm_queue_exception(vcpu, UD_VECTOR);
815 else if (res < 0)
816 kvm_inject_gp(vcpu, 0);
817
818 return res;
819}
820EXPORT_SYMBOL_GPL(kvm_set_dr);
821
822static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
823{
824 switch (dr) {
825 case 0 ... 3:
826 *val = vcpu->arch.db[dr];
827 break;
828 case 4:
829 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
830 return 1;
831
832 case 6:
833 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
834 *val = vcpu->arch.dr6;
835 else
836 *val = kvm_x86_ops->get_dr6(vcpu);
837 break;
838 case 5:
839 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
840 return 1;
841
842 default:
843 *val = vcpu->arch.dr7;
844 break;
845 }
846
847 return 0;
848}
849
850int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
851{
852 if (_kvm_get_dr(vcpu, dr, val)) {
853 kvm_queue_exception(vcpu, UD_VECTOR);
854 return 1;
855 }
856 return 0;
857}
858EXPORT_SYMBOL_GPL(kvm_get_dr);
859
860bool kvm_rdpmc(struct kvm_vcpu *vcpu)
861{
862 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
863 u64 data;
864 int err;
865
866 err = kvm_pmu_read_pmc(vcpu, ecx, &data);
867 if (err)
868 return err;
869 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
870 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
871 return err;
872}
873EXPORT_SYMBOL_GPL(kvm_rdpmc);
874
875
876
877
878
879
880
881
882
883
884#define KVM_SAVE_MSRS_BEGIN 12
885static u32 msrs_to_save[] = {
886 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
887 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
888 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
889 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
890 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
891 MSR_KVM_PV_EOI_EN,
892 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
893 MSR_STAR,
894#ifdef CONFIG_X86_64
895 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
896#endif
897 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
898 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
899};
900
901static unsigned num_msrs_to_save;
902
903static const u32 emulated_msrs[] = {
904 MSR_IA32_TSC_ADJUST,
905 MSR_IA32_TSCDEADLINE,
906 MSR_IA32_MISC_ENABLE,
907 MSR_IA32_MCG_STATUS,
908 MSR_IA32_MCG_CTL,
909};
910
911bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
912{
913 if (efer & efer_reserved_bits)
914 return false;
915
916 if (efer & EFER_FFXSR) {
917 struct kvm_cpuid_entry2 *feat;
918
919 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
920 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
921 return false;
922 }
923
924 if (efer & EFER_SVME) {
925 struct kvm_cpuid_entry2 *feat;
926
927 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
928 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
929 return false;
930 }
931
932 return true;
933}
934EXPORT_SYMBOL_GPL(kvm_valid_efer);
935
936static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
937{
938 u64 old_efer = vcpu->arch.efer;
939
940 if (!kvm_valid_efer(vcpu, efer))
941 return 1;
942
943 if (is_paging(vcpu)
944 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
945 return 1;
946
947 efer &= ~EFER_LMA;
948 efer |= vcpu->arch.efer & EFER_LMA;
949
950 kvm_x86_ops->set_efer(vcpu, efer);
951
952
953 if ((efer ^ old_efer) & EFER_NX)
954 kvm_mmu_reset_context(vcpu);
955
956 return 0;
957}
958
959void kvm_enable_efer_bits(u64 mask)
960{
961 efer_reserved_bits &= ~mask;
962}
963EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
964
965
966
967
968
969
970
971int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
972{
973 return kvm_x86_ops->set_msr(vcpu, msr);
974}
975
976
977
978
979static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
980{
981 struct msr_data msr;
982
983 msr.data = *data;
984 msr.index = index;
985 msr.host_initiated = true;
986 return kvm_set_msr(vcpu, &msr);
987}
988
989#ifdef CONFIG_X86_64
990struct pvclock_gtod_data {
991 seqcount_t seq;
992
993 struct {
994 int vclock_mode;
995 cycle_t cycle_last;
996 cycle_t mask;
997 u32 mult;
998 u32 shift;
999 } clock;
1000
1001
1002 u64 monotonic_time_snsec;
1003 time_t monotonic_time_sec;
1004};
1005
1006static struct pvclock_gtod_data pvclock_gtod_data;
1007
1008static void update_pvclock_gtod(struct timekeeper *tk)
1009{
1010 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1011
1012 write_seqcount_begin(&vdata->seq);
1013
1014
1015 vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
1016 vdata->clock.cycle_last = tk->clock->cycle_last;
1017 vdata->clock.mask = tk->clock->mask;
1018 vdata->clock.mult = tk->mult;
1019 vdata->clock.shift = tk->shift;
1020
1021 vdata->monotonic_time_sec = tk->xtime_sec
1022 + tk->wall_to_monotonic.tv_sec;
1023 vdata->monotonic_time_snsec = tk->xtime_nsec
1024 + (tk->wall_to_monotonic.tv_nsec
1025 << tk->shift);
1026 while (vdata->monotonic_time_snsec >=
1027 (((u64)NSEC_PER_SEC) << tk->shift)) {
1028 vdata->monotonic_time_snsec -=
1029 ((u64)NSEC_PER_SEC) << tk->shift;
1030 vdata->monotonic_time_sec++;
1031 }
1032
1033 write_seqcount_end(&vdata->seq);
1034}
1035#endif
1036
1037
1038static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1039{
1040 int version;
1041 int r;
1042 struct pvclock_wall_clock wc;
1043 struct timespec boot;
1044
1045 if (!wall_clock)
1046 return;
1047
1048 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1049 if (r)
1050 return;
1051
1052 if (version & 1)
1053 ++version;
1054
1055 ++version;
1056
1057 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1058
1059
1060
1061
1062
1063
1064
1065 getboottime(&boot);
1066
1067 if (kvm->arch.kvmclock_offset) {
1068 struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
1069 boot = timespec_sub(boot, ts);
1070 }
1071 wc.sec = boot.tv_sec;
1072 wc.nsec = boot.tv_nsec;
1073 wc.version = version;
1074
1075 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1076
1077 version++;
1078 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1079}
1080
1081static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1082{
1083 uint32_t quotient, remainder;
1084
1085
1086
1087 __asm__ ( "divl %4"
1088 : "=a" (quotient), "=d" (remainder)
1089 : "0" (0), "1" (dividend), "r" (divisor) );
1090 return quotient;
1091}
1092
1093static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1094 s8 *pshift, u32 *pmultiplier)
1095{
1096 uint64_t scaled64;
1097 int32_t shift = 0;
1098 uint64_t tps64;
1099 uint32_t tps32;
1100
1101 tps64 = base_khz * 1000LL;
1102 scaled64 = scaled_khz * 1000LL;
1103 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1104 tps64 >>= 1;
1105 shift--;
1106 }
1107
1108 tps32 = (uint32_t)tps64;
1109 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1110 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1111 scaled64 >>= 1;
1112 else
1113 tps32 <<= 1;
1114 shift++;
1115 }
1116
1117 *pshift = shift;
1118 *pmultiplier = div_frac(scaled64, tps32);
1119
1120 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
1121 __func__, base_khz, scaled_khz, shift, *pmultiplier);
1122}
1123
1124static inline u64 get_kernel_ns(void)
1125{
1126 struct timespec ts;
1127
1128 ktime_get_ts(&ts);
1129 monotonic_to_bootbased(&ts);
1130 return timespec_to_ns(&ts);
1131}
1132
1133#ifdef CONFIG_X86_64
1134static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1135#endif
1136
1137static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1138unsigned long max_tsc_khz;
1139
1140static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
1141{
1142 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
1143 vcpu->arch.virtual_tsc_shift);
1144}
1145
1146static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1147{
1148 u64 v = (u64)khz * (1000000 + ppm);
1149 do_div(v, 1000000);
1150 return v;
1151}
1152
1153static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1154{
1155 u32 thresh_lo, thresh_hi;
1156 int use_scaling = 0;
1157
1158
1159 if (this_tsc_khz == 0)
1160 return;
1161
1162
1163 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1164 &vcpu->arch.virtual_tsc_shift,
1165 &vcpu->arch.virtual_tsc_mult);
1166 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
1167
1168
1169
1170
1171
1172
1173
1174 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1175 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1176 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1177 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1178 use_scaling = 1;
1179 }
1180 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1181}
1182
1183static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1184{
1185 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1186 vcpu->arch.virtual_tsc_mult,
1187 vcpu->arch.virtual_tsc_shift);
1188 tsc += vcpu->arch.this_tsc_write;
1189 return tsc;
1190}
1191
1192void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1193{
1194#ifdef CONFIG_X86_64
1195 bool vcpus_matched;
1196 bool do_request = false;
1197 struct kvm_arch *ka = &vcpu->kvm->arch;
1198 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1199
1200 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1201 atomic_read(&vcpu->kvm->online_vcpus));
1202
1203 if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
1204 if (!ka->use_master_clock)
1205 do_request = 1;
1206
1207 if (!vcpus_matched && ka->use_master_clock)
1208 do_request = 1;
1209
1210 if (do_request)
1211 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1212
1213 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1214 atomic_read(&vcpu->kvm->online_vcpus),
1215 ka->use_master_clock, gtod->clock.vclock_mode);
1216#endif
1217}
1218
1219static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1220{
1221 u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
1222 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1223}
1224
1225void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1226{
1227 struct kvm *kvm = vcpu->kvm;
1228 u64 offset, ns, elapsed;
1229 unsigned long flags;
1230 s64 usdiff;
1231 bool matched;
1232 u64 data = msr->data;
1233
1234 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1235 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1236 ns = get_kernel_ns();
1237 elapsed = ns - kvm->arch.last_tsc_nsec;
1238
1239 if (vcpu->arch.virtual_tsc_khz) {
1240 int faulted = 0;
1241
1242
1243 usdiff = data - kvm->arch.last_tsc_write;
1244#ifdef CONFIG_X86_64
1245 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1246#else
1247
1248 asm("1: idivl %[divisor]\n"
1249 "2: xor %%edx, %%edx\n"
1250 " movl $0, %[faulted]\n"
1251 "3:\n"
1252 ".section .fixup,\"ax\"\n"
1253 "4: movl $1, %[faulted]\n"
1254 " jmp 3b\n"
1255 ".previous\n"
1256
1257 _ASM_EXTABLE(1b, 4b)
1258
1259 : "=A"(usdiff), [faulted] "=r" (faulted)
1260 : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
1261
1262#endif
1263 do_div(elapsed, 1000);
1264 usdiff -= elapsed;
1265 if (usdiff < 0)
1266 usdiff = -usdiff;
1267
1268
1269 if (faulted)
1270 usdiff = USEC_PER_SEC;
1271 } else
1272 usdiff = USEC_PER_SEC;
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 if (usdiff < USEC_PER_SEC &&
1285 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1286 if (!check_tsc_unstable()) {
1287 offset = kvm->arch.cur_tsc_offset;
1288 pr_debug("kvm: matched tsc offset for %llu\n", data);
1289 } else {
1290 u64 delta = nsec_to_cycles(vcpu, elapsed);
1291 data += delta;
1292 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1293 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1294 }
1295 matched = true;
1296 } else {
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306 kvm->arch.cur_tsc_generation++;
1307 kvm->arch.cur_tsc_nsec = ns;
1308 kvm->arch.cur_tsc_write = data;
1309 kvm->arch.cur_tsc_offset = offset;
1310 matched = false;
1311 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1312 kvm->arch.cur_tsc_generation, data);
1313 }
1314
1315
1316
1317
1318
1319 kvm->arch.last_tsc_nsec = ns;
1320 kvm->arch.last_tsc_write = data;
1321 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1322
1323 vcpu->arch.last_guest_tsc = data;
1324
1325
1326 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1327 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1328 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1329
1330 if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
1331 update_ia32_tsc_adjust_msr(vcpu, offset);
1332 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1333 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1334
1335 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1336 if (matched)
1337 kvm->arch.nr_vcpus_matched_tsc++;
1338 else
1339 kvm->arch.nr_vcpus_matched_tsc = 0;
1340
1341 kvm_track_tsc_matching(vcpu);
1342 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1343}
1344
1345EXPORT_SYMBOL_GPL(kvm_write_tsc);
1346
1347#ifdef CONFIG_X86_64
1348
1349static cycle_t read_tsc(void)
1350{
1351 cycle_t ret;
1352 u64 last;
1353
1354
1355
1356
1357
1358
1359
1360
1361 rdtsc_barrier();
1362 ret = (cycle_t)vget_cycles();
1363
1364 last = pvclock_gtod_data.clock.cycle_last;
1365
1366 if (likely(ret >= last))
1367 return ret;
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377 asm volatile ("");
1378 return last;
1379}
1380
1381static inline u64 vgettsc(cycle_t *cycle_now)
1382{
1383 long v;
1384 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1385
1386 *cycle_now = read_tsc();
1387
1388 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1389 return v * gtod->clock.mult;
1390}
1391
1392static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
1393{
1394 unsigned long seq;
1395 u64 ns;
1396 int mode;
1397 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1398
1399 ts->tv_nsec = 0;
1400 do {
1401 seq = read_seqcount_begin(>od->seq);
1402 mode = gtod->clock.vclock_mode;
1403 ts->tv_sec = gtod->monotonic_time_sec;
1404 ns = gtod->monotonic_time_snsec;
1405 ns += vgettsc(cycle_now);
1406 ns >>= gtod->clock.shift;
1407 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1408 timespec_add_ns(ts, ns);
1409
1410 return mode;
1411}
1412
1413
1414static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1415{
1416 struct timespec ts;
1417
1418
1419 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1420 return false;
1421
1422 if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
1423 return false;
1424
1425 monotonic_to_bootbased(&ts);
1426 *kernel_ns = timespec_to_ns(&ts);
1427
1428 return true;
1429}
1430#endif
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1474{
1475#ifdef CONFIG_X86_64
1476 struct kvm_arch *ka = &kvm->arch;
1477 int vclock_mode;
1478 bool host_tsc_clocksource, vcpus_matched;
1479
1480 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1481 atomic_read(&kvm->online_vcpus));
1482
1483
1484
1485
1486
1487 host_tsc_clocksource = kvm_get_time_and_clockread(
1488 &ka->master_kernel_ns,
1489 &ka->master_cycle_now);
1490
1491 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
1492 && !backwards_tsc_observed;
1493
1494 if (ka->use_master_clock)
1495 atomic_set(&kvm_guest_has_master_clock, 1);
1496
1497 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1498 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1499 vcpus_matched);
1500#endif
1501}
1502
1503static void kvm_gen_update_masterclock(struct kvm *kvm)
1504{
1505#ifdef CONFIG_X86_64
1506 int i;
1507 struct kvm_vcpu *vcpu;
1508 struct kvm_arch *ka = &kvm->arch;
1509
1510 spin_lock(&ka->pvclock_gtod_sync_lock);
1511 kvm_make_mclock_inprogress_request(kvm);
1512
1513 pvclock_update_vm_gtod_copy(kvm);
1514
1515 kvm_for_each_vcpu(i, vcpu, kvm)
1516 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
1517
1518
1519 kvm_for_each_vcpu(i, vcpu, kvm)
1520 clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
1521
1522 spin_unlock(&ka->pvclock_gtod_sync_lock);
1523#endif
1524}
1525
1526static int kvm_guest_time_update(struct kvm_vcpu *v)
1527{
1528 unsigned long flags, this_tsc_khz;
1529 struct kvm_vcpu_arch *vcpu = &v->arch;
1530 struct kvm_arch *ka = &v->kvm->arch;
1531 s64 kernel_ns;
1532 u64 tsc_timestamp, host_tsc;
1533 struct pvclock_vcpu_time_info guest_hv_clock;
1534 u8 pvclock_flags;
1535 bool use_master_clock;
1536
1537 kernel_ns = 0;
1538 host_tsc = 0;
1539
1540
1541
1542
1543
1544 spin_lock(&ka->pvclock_gtod_sync_lock);
1545 use_master_clock = ka->use_master_clock;
1546 if (use_master_clock) {
1547 host_tsc = ka->master_cycle_now;
1548 kernel_ns = ka->master_kernel_ns;
1549 }
1550 spin_unlock(&ka->pvclock_gtod_sync_lock);
1551
1552
1553 local_irq_save(flags);
1554 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1555 if (unlikely(this_tsc_khz == 0)) {
1556 local_irq_restore(flags);
1557 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1558 return 1;
1559 }
1560 if (!use_master_clock) {
1561 host_tsc = native_read_tsc();
1562 kernel_ns = get_kernel_ns();
1563 }
1564
1565 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577 if (vcpu->tsc_catchup) {
1578 u64 tsc = compute_guest_tsc(v, kernel_ns);
1579 if (tsc > tsc_timestamp) {
1580 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1581 tsc_timestamp = tsc;
1582 }
1583 }
1584
1585 local_irq_restore(flags);
1586
1587 if (!vcpu->pv_time_enabled)
1588 return 0;
1589
1590 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1591 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1592 &vcpu->hv_clock.tsc_shift,
1593 &vcpu->hv_clock.tsc_to_system_mul);
1594 vcpu->hw_tsc_khz = this_tsc_khz;
1595 }
1596
1597
1598 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1599 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1600 vcpu->last_guest_tsc = tsc_timestamp;
1601
1602
1603
1604
1605
1606
1607 vcpu->hv_clock.version += 2;
1608
1609 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1610 &guest_hv_clock, sizeof(guest_hv_clock))))
1611 return 0;
1612
1613
1614 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
1615
1616 if (vcpu->pvclock_set_guest_stopped_request) {
1617 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1618 vcpu->pvclock_set_guest_stopped_request = false;
1619 }
1620
1621
1622 if (use_master_clock)
1623 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1624
1625 vcpu->hv_clock.flags = pvclock_flags;
1626
1627 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1628 &vcpu->hv_clock,
1629 sizeof(vcpu->hv_clock));
1630 return 0;
1631}
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
1648
1649static void kvmclock_update_fn(struct work_struct *work)
1650{
1651 int i;
1652 struct delayed_work *dwork = to_delayed_work(work);
1653 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1654 kvmclock_update_work);
1655 struct kvm *kvm = container_of(ka, struct kvm, arch);
1656 struct kvm_vcpu *vcpu;
1657
1658 kvm_for_each_vcpu(i, vcpu, kvm) {
1659 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
1660 kvm_vcpu_kick(vcpu);
1661 }
1662}
1663
1664static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1665{
1666 struct kvm *kvm = v->kvm;
1667
1668 set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
1669 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
1670 KVMCLOCK_UPDATE_DELAY);
1671}
1672
1673#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
1674
1675static void kvmclock_sync_fn(struct work_struct *work)
1676{
1677 struct delayed_work *dwork = to_delayed_work(work);
1678 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1679 kvmclock_sync_work);
1680 struct kvm *kvm = container_of(ka, struct kvm, arch);
1681
1682 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
1683 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
1684 KVMCLOCK_SYNC_PERIOD);
1685}
1686
1687static bool msr_mtrr_valid(unsigned msr)
1688{
1689 switch (msr) {
1690 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1691 case MSR_MTRRfix64K_00000:
1692 case MSR_MTRRfix16K_80000:
1693 case MSR_MTRRfix16K_A0000:
1694 case MSR_MTRRfix4K_C0000:
1695 case MSR_MTRRfix4K_C8000:
1696 case MSR_MTRRfix4K_D0000:
1697 case MSR_MTRRfix4K_D8000:
1698 case MSR_MTRRfix4K_E0000:
1699 case MSR_MTRRfix4K_E8000:
1700 case MSR_MTRRfix4K_F0000:
1701 case MSR_MTRRfix4K_F8000:
1702 case MSR_MTRRdefType:
1703 case MSR_IA32_CR_PAT:
1704 return true;
1705 case 0x2f8:
1706 return true;
1707 }
1708 return false;
1709}
1710
1711static bool valid_pat_type(unsigned t)
1712{
1713 return t < 8 && (1 << t) & 0xf3;
1714}
1715
1716static bool valid_mtrr_type(unsigned t)
1717{
1718 return t < 8 && (1 << t) & 0x73;
1719}
1720
1721static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1722{
1723 int i;
1724
1725 if (!msr_mtrr_valid(msr))
1726 return false;
1727
1728 if (msr == MSR_IA32_CR_PAT) {
1729 for (i = 0; i < 8; i++)
1730 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1731 return false;
1732 return true;
1733 } else if (msr == MSR_MTRRdefType) {
1734 if (data & ~0xcff)
1735 return false;
1736 return valid_mtrr_type(data & 0xff);
1737 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1738 for (i = 0; i < 8 ; i++)
1739 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1740 return false;
1741 return true;
1742 }
1743
1744
1745 return valid_mtrr_type(data & 0xff);
1746}
1747
1748static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1749{
1750 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1751
1752 if (!mtrr_valid(vcpu, msr, data))
1753 return 1;
1754
1755 if (msr == MSR_MTRRdefType) {
1756 vcpu->arch.mtrr_state.def_type = data;
1757 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1758 } else if (msr == MSR_MTRRfix64K_00000)
1759 p[0] = data;
1760 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1761 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1762 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1763 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1764 else if (msr == MSR_IA32_CR_PAT)
1765 vcpu->arch.pat = data;
1766 else {
1767 int idx, is_mtrr_mask;
1768 u64 *pt;
1769
1770 idx = (msr - 0x200) / 2;
1771 is_mtrr_mask = msr - 0x200 - 2 * idx;
1772 if (!is_mtrr_mask)
1773 pt =
1774 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1775 else
1776 pt =
1777 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1778 *pt = data;
1779 }
1780
1781 kvm_mmu_reset_context(vcpu);
1782 return 0;
1783}
1784
1785static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1786{
1787 u64 mcg_cap = vcpu->arch.mcg_cap;
1788 unsigned bank_num = mcg_cap & 0xff;
1789
1790 switch (msr) {
1791 case MSR_IA32_MCG_STATUS:
1792 vcpu->arch.mcg_status = data;
1793 break;
1794 case MSR_IA32_MCG_CTL:
1795 if (!(mcg_cap & MCG_CTL_P))
1796 return 1;
1797 if (data != 0 && data != ~(u64)0)
1798 return -1;
1799 vcpu->arch.mcg_ctl = data;
1800 break;
1801 default:
1802 if (msr >= MSR_IA32_MC0_CTL &&
1803 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1804 u32 offset = msr - MSR_IA32_MC0_CTL;
1805
1806
1807
1808
1809
1810 if ((offset & 0x3) == 0 &&
1811 data != 0 && (data | (1 << 10)) != ~(u64)0)
1812 return -1;
1813 vcpu->arch.mce_banks[offset] = data;
1814 break;
1815 }
1816 return 1;
1817 }
1818 return 0;
1819}
1820
1821static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1822{
1823 struct kvm *kvm = vcpu->kvm;
1824 int lm = is_long_mode(vcpu);
1825 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1826 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1827 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1828 : kvm->arch.xen_hvm_config.blob_size_32;
1829 u32 page_num = data & ~PAGE_MASK;
1830 u64 page_addr = data & PAGE_MASK;
1831 u8 *page;
1832 int r;
1833
1834 r = -E2BIG;
1835 if (page_num >= blob_size)
1836 goto out;
1837 r = -ENOMEM;
1838 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
1839 if (IS_ERR(page)) {
1840 r = PTR_ERR(page);
1841 goto out;
1842 }
1843 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1844 goto out_free;
1845 r = 0;
1846out_free:
1847 kfree(page);
1848out:
1849 return r;
1850}
1851
1852static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1853{
1854 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1855}
1856
1857static bool kvm_hv_msr_partition_wide(u32 msr)
1858{
1859 bool r = false;
1860 switch (msr) {
1861 case HV_X64_MSR_GUEST_OS_ID:
1862 case HV_X64_MSR_HYPERCALL:
1863 case HV_X64_MSR_REFERENCE_TSC:
1864 case HV_X64_MSR_TIME_REF_COUNT:
1865 r = true;
1866 break;
1867 }
1868
1869 return r;
1870}
1871
1872static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1873{
1874 struct kvm *kvm = vcpu->kvm;
1875
1876 switch (msr) {
1877 case HV_X64_MSR_GUEST_OS_ID:
1878 kvm->arch.hv_guest_os_id = data;
1879
1880 if (!kvm->arch.hv_guest_os_id)
1881 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1882 break;
1883 case HV_X64_MSR_HYPERCALL: {
1884 u64 gfn;
1885 unsigned long addr;
1886 u8 instructions[4];
1887
1888
1889 if (!kvm->arch.hv_guest_os_id)
1890 break;
1891 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1892 kvm->arch.hv_hypercall = data;
1893 break;
1894 }
1895 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1896 addr = gfn_to_hva(kvm, gfn);
1897 if (kvm_is_error_hva(addr))
1898 return 1;
1899 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1900 ((unsigned char *)instructions)[3] = 0xc3;
1901 if (__copy_to_user((void __user *)addr, instructions, 4))
1902 return 1;
1903 kvm->arch.hv_hypercall = data;
1904 mark_page_dirty(kvm, gfn);
1905 break;
1906 }
1907 case HV_X64_MSR_REFERENCE_TSC: {
1908 u64 gfn;
1909 HV_REFERENCE_TSC_PAGE tsc_ref;
1910 memset(&tsc_ref, 0, sizeof(tsc_ref));
1911 kvm->arch.hv_tsc_page = data;
1912 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1913 break;
1914 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1915 if (kvm_write_guest(kvm, data,
1916 &tsc_ref, sizeof(tsc_ref)))
1917 return 1;
1918 mark_page_dirty(kvm, gfn);
1919 break;
1920 }
1921 default:
1922 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1923 "data 0x%llx\n", msr, data);
1924 return 1;
1925 }
1926 return 0;
1927}
1928
1929static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1930{
1931 switch (msr) {
1932 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1933 u64 gfn;
1934 unsigned long addr;
1935
1936 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1937 vcpu->arch.hv_vapic = data;
1938 break;
1939 }
1940 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
1941 addr = gfn_to_hva(vcpu->kvm, gfn);
1942 if (kvm_is_error_hva(addr))
1943 return 1;
1944 if (__clear_user((void __user *)addr, PAGE_SIZE))
1945 return 1;
1946 vcpu->arch.hv_vapic = data;
1947 mark_page_dirty(vcpu->kvm, gfn);
1948 break;
1949 }
1950 case HV_X64_MSR_EOI:
1951 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1952 case HV_X64_MSR_ICR:
1953 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1954 case HV_X64_MSR_TPR:
1955 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1956 default:
1957 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1958 "data 0x%llx\n", msr, data);
1959 return 1;
1960 }
1961
1962 return 0;
1963}
1964
1965static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1966{
1967 gpa_t gpa = data & ~0x3f;
1968
1969
1970 if (data & 0x3c)
1971 return 1;
1972
1973 vcpu->arch.apf.msr_val = data;
1974
1975 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1976 kvm_clear_async_pf_completion_queue(vcpu);
1977 kvm_async_pf_hash_reset(vcpu);
1978 return 0;
1979 }
1980
1981 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
1982 sizeof(u32)))
1983 return 1;
1984
1985 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1986 kvm_async_pf_wakeup_all(vcpu);
1987 return 0;
1988}
1989
1990static void kvmclock_reset(struct kvm_vcpu *vcpu)
1991{
1992 vcpu->arch.pv_time_enabled = false;
1993}
1994
1995static void accumulate_steal_time(struct kvm_vcpu *vcpu)
1996{
1997 u64 delta;
1998
1999 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2000 return;
2001
2002 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
2003 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2004 vcpu->arch.st.accum_steal = delta;
2005}
2006
2007static void record_steal_time(struct kvm_vcpu *vcpu)
2008{
2009 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2010 return;
2011
2012 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2013 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2014 return;
2015
2016 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
2017 vcpu->arch.st.steal.version += 2;
2018 vcpu->arch.st.accum_steal = 0;
2019
2020 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2021 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2022}
2023
2024int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2025{
2026 bool pr = false;
2027 u32 msr = msr_info->index;
2028 u64 data = msr_info->data;
2029
2030 switch (msr) {
2031 case MSR_AMD64_NB_CFG:
2032 case MSR_IA32_UCODE_REV:
2033 case MSR_IA32_UCODE_WRITE:
2034 case MSR_VM_HSAVE_PA:
2035 case MSR_AMD64_PATCH_LOADER:
2036 case MSR_AMD64_BU_CFG2:
2037 break;
2038
2039 case MSR_EFER:
2040 return set_efer(vcpu, data);
2041 case MSR_K7_HWCR:
2042 data &= ~(u64)0x40;
2043 data &= ~(u64)0x100;
2044 data &= ~(u64)0x8;
2045 if (data != 0) {
2046 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2047 data);
2048 return 1;
2049 }
2050 break;
2051 case MSR_FAM10H_MMIO_CONF_BASE:
2052 if (data != 0) {
2053 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2054 "0x%llx\n", data);
2055 return 1;
2056 }
2057 break;
2058 case MSR_IA32_DEBUGCTLMSR:
2059 if (!data) {
2060
2061 break;
2062 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2063
2064
2065 return 1;
2066 }
2067 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2068 __func__, data);
2069 break;
2070 case 0x200 ... 0x2ff:
2071 return set_msr_mtrr(vcpu, msr, data);
2072 case MSR_IA32_APICBASE:
2073 return kvm_set_apic_base(vcpu, msr_info);
2074 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2075 return kvm_x2apic_msr_write(vcpu, msr, data);
2076 case MSR_IA32_TSCDEADLINE:
2077 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2078 break;
2079 case MSR_IA32_TSC_ADJUST:
2080 if (guest_cpuid_has_tsc_adjust(vcpu)) {
2081 if (!msr_info->host_initiated) {
2082 u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2083 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
2084 }
2085 vcpu->arch.ia32_tsc_adjust_msr = data;
2086 }
2087 break;
2088 case MSR_IA32_MISC_ENABLE:
2089 vcpu->arch.ia32_misc_enable_msr = data;
2090 break;
2091 case MSR_KVM_WALL_CLOCK_NEW:
2092 case MSR_KVM_WALL_CLOCK:
2093 vcpu->kvm->arch.wall_clock = data;
2094 kvm_write_wall_clock(vcpu->kvm, data);
2095 break;
2096 case MSR_KVM_SYSTEM_TIME_NEW:
2097 case MSR_KVM_SYSTEM_TIME: {
2098 u64 gpa_offset;
2099 kvmclock_reset(vcpu);
2100
2101 vcpu->arch.time = data;
2102 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2103
2104
2105 if (!(data & 1))
2106 break;
2107
2108 gpa_offset = data & ~(PAGE_MASK | 1);
2109
2110 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2111 &vcpu->arch.pv_time, data & ~1ULL,
2112 sizeof(struct pvclock_vcpu_time_info)))
2113 vcpu->arch.pv_time_enabled = false;
2114 else
2115 vcpu->arch.pv_time_enabled = true;
2116
2117 break;
2118 }
2119 case MSR_KVM_ASYNC_PF_EN:
2120 if (kvm_pv_enable_async_pf(vcpu, data))
2121 return 1;
2122 break;
2123 case MSR_KVM_STEAL_TIME:
2124
2125 if (unlikely(!sched_info_on()))
2126 return 1;
2127
2128 if (data & KVM_STEAL_RESERVED_MASK)
2129 return 1;
2130
2131 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2132 data & KVM_STEAL_VALID_BITS,
2133 sizeof(struct kvm_steal_time)))
2134 return 1;
2135
2136 vcpu->arch.st.msr_val = data;
2137
2138 if (!(data & KVM_MSR_ENABLED))
2139 break;
2140
2141 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2142
2143 preempt_disable();
2144 accumulate_steal_time(vcpu);
2145 preempt_enable();
2146
2147 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2148
2149 break;
2150 case MSR_KVM_PV_EOI_EN:
2151 if (kvm_lapic_enable_pv_eoi(vcpu, data))
2152 return 1;
2153 break;
2154
2155 case MSR_IA32_MCG_CTL:
2156 case MSR_IA32_MCG_STATUS:
2157 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2158 return set_msr_mce(vcpu, msr, data);
2159
2160
2161
2162
2163
2164
2165
2166
2167 case MSR_K7_EVNTSEL0:
2168 case MSR_K7_EVNTSEL1:
2169 case MSR_K7_EVNTSEL2:
2170 case MSR_K7_EVNTSEL3:
2171 if (data != 0)
2172 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2173 "0x%x data 0x%llx\n", msr, data);
2174 break;
2175
2176
2177
2178 case MSR_K7_PERFCTR0:
2179 case MSR_K7_PERFCTR1:
2180 case MSR_K7_PERFCTR2:
2181 case MSR_K7_PERFCTR3:
2182 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2183 "0x%x data 0x%llx\n", msr, data);
2184 break;
2185 case MSR_P6_PERFCTR0:
2186 case MSR_P6_PERFCTR1:
2187 pr = true;
2188 case MSR_P6_EVNTSEL0:
2189 case MSR_P6_EVNTSEL1:
2190 if (kvm_pmu_msr(vcpu, msr))
2191 return kvm_pmu_set_msr(vcpu, msr_info);
2192
2193 if (pr || data != 0)
2194 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2195 "0x%x data 0x%llx\n", msr, data);
2196 break;
2197 case MSR_K7_CLK_CTL:
2198
2199
2200
2201
2202
2203
2204
2205
2206 break;
2207 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2208 if (kvm_hv_msr_partition_wide(msr)) {
2209 int r;
2210 mutex_lock(&vcpu->kvm->lock);
2211 r = set_msr_hyperv_pw(vcpu, msr, data);
2212 mutex_unlock(&vcpu->kvm->lock);
2213 return r;
2214 } else
2215 return set_msr_hyperv(vcpu, msr, data);
2216 break;
2217 case MSR_IA32_BBL_CR_CTL3:
2218
2219
2220
2221 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
2222 break;
2223 case MSR_AMD64_OSVW_ID_LENGTH:
2224 if (!guest_cpuid_has_osvw(vcpu))
2225 return 1;
2226 vcpu->arch.osvw.length = data;
2227 break;
2228 case MSR_AMD64_OSVW_STATUS:
2229 if (!guest_cpuid_has_osvw(vcpu))
2230 return 1;
2231 vcpu->arch.osvw.status = data;
2232 break;
2233 default:
2234 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2235 return xen_hvm_config(vcpu, data);
2236 if (kvm_pmu_msr(vcpu, msr))
2237 return kvm_pmu_set_msr(vcpu, msr_info);
2238 if (!ignore_msrs) {
2239 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
2240 msr, data);
2241 return 1;
2242 } else {
2243 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
2244 msr, data);
2245 break;
2246 }
2247 }
2248 return 0;
2249}
2250EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2251
2252
2253
2254
2255
2256
2257
2258int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2259{
2260 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
2261}
2262
2263static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2264{
2265 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
2266
2267 if (!msr_mtrr_valid(msr))
2268 return 1;
2269
2270 if (msr == MSR_MTRRdefType)
2271 *pdata = vcpu->arch.mtrr_state.def_type +
2272 (vcpu->arch.mtrr_state.enabled << 10);
2273 else if (msr == MSR_MTRRfix64K_00000)
2274 *pdata = p[0];
2275 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
2276 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
2277 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
2278 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
2279 else if (msr == MSR_IA32_CR_PAT)
2280 *pdata = vcpu->arch.pat;
2281 else {
2282 int idx, is_mtrr_mask;
2283 u64 *pt;
2284
2285 idx = (msr - 0x200) / 2;
2286 is_mtrr_mask = msr - 0x200 - 2 * idx;
2287 if (!is_mtrr_mask)
2288 pt =
2289 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
2290 else
2291 pt =
2292 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
2293 *pdata = *pt;
2294 }
2295
2296 return 0;
2297}
2298
2299static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2300{
2301 u64 data;
2302 u64 mcg_cap = vcpu->arch.mcg_cap;
2303 unsigned bank_num = mcg_cap & 0xff;
2304
2305 switch (msr) {
2306 case MSR_IA32_P5_MC_ADDR:
2307 case MSR_IA32_P5_MC_TYPE:
2308 data = 0;
2309 break;
2310 case MSR_IA32_MCG_CAP:
2311 data = vcpu->arch.mcg_cap;
2312 break;
2313 case MSR_IA32_MCG_CTL:
2314 if (!(mcg_cap & MCG_CTL_P))
2315 return 1;
2316 data = vcpu->arch.mcg_ctl;
2317 break;
2318 case MSR_IA32_MCG_STATUS:
2319 data = vcpu->arch.mcg_status;
2320 break;
2321 default:
2322 if (msr >= MSR_IA32_MC0_CTL &&
2323 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
2324 u32 offset = msr - MSR_IA32_MC0_CTL;
2325 data = vcpu->arch.mce_banks[offset];
2326 break;
2327 }
2328 return 1;
2329 }
2330 *pdata = data;
2331 return 0;
2332}
2333
2334static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2335{
2336 u64 data = 0;
2337 struct kvm *kvm = vcpu->kvm;
2338
2339 switch (msr) {
2340 case HV_X64_MSR_GUEST_OS_ID:
2341 data = kvm->arch.hv_guest_os_id;
2342 break;
2343 case HV_X64_MSR_HYPERCALL:
2344 data = kvm->arch.hv_hypercall;
2345 break;
2346 case HV_X64_MSR_TIME_REF_COUNT: {
2347 data =
2348 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
2349 break;
2350 }
2351 case HV_X64_MSR_REFERENCE_TSC:
2352 data = kvm->arch.hv_tsc_page;
2353 break;
2354 default:
2355 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2356 return 1;
2357 }
2358
2359 *pdata = data;
2360 return 0;
2361}
2362
2363static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2364{
2365 u64 data = 0;
2366
2367 switch (msr) {
2368 case HV_X64_MSR_VP_INDEX: {
2369 int r;
2370 struct kvm_vcpu *v;
2371 kvm_for_each_vcpu(r, v, vcpu->kvm) {
2372 if (v == vcpu) {
2373 data = r;
2374 break;
2375 }
2376 }
2377 break;
2378 }
2379 case HV_X64_MSR_EOI:
2380 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
2381 case HV_X64_MSR_ICR:
2382 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
2383 case HV_X64_MSR_TPR:
2384 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
2385 case HV_X64_MSR_APIC_ASSIST_PAGE:
2386 data = vcpu->arch.hv_vapic;
2387 break;
2388 default:
2389 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2390 return 1;
2391 }
2392 *pdata = data;
2393 return 0;
2394}
2395
2396int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2397{
2398 u64 data;
2399
2400 switch (msr) {
2401 case MSR_IA32_PLATFORM_ID:
2402 case MSR_IA32_EBL_CR_POWERON:
2403 case MSR_IA32_DEBUGCTLMSR:
2404 case MSR_IA32_LASTBRANCHFROMIP:
2405 case MSR_IA32_LASTBRANCHTOIP:
2406 case MSR_IA32_LASTINTFROMIP:
2407 case MSR_IA32_LASTINTTOIP:
2408 case MSR_K8_SYSCFG:
2409 case MSR_K7_HWCR:
2410 case MSR_VM_HSAVE_PA:
2411 case MSR_K7_EVNTSEL0:
2412 case MSR_K7_PERFCTR0:
2413 case MSR_K8_INT_PENDING_MSG:
2414 case MSR_AMD64_NB_CFG:
2415 case MSR_FAM10H_MMIO_CONF_BASE:
2416 case MSR_AMD64_BU_CFG2:
2417 data = 0;
2418 break;
2419 case MSR_P6_PERFCTR0:
2420 case MSR_P6_PERFCTR1:
2421 case MSR_P6_EVNTSEL0:
2422 case MSR_P6_EVNTSEL1:
2423 if (kvm_pmu_msr(vcpu, msr))
2424 return kvm_pmu_get_msr(vcpu, msr, pdata);
2425 data = 0;
2426 break;
2427 case MSR_IA32_UCODE_REV:
2428 data = 0x100000000ULL;
2429 break;
2430 case MSR_MTRRcap:
2431 data = 0x500 | KVM_NR_VAR_MTRR;
2432 break;
2433 case 0x200 ... 0x2ff:
2434 return get_msr_mtrr(vcpu, msr, pdata);
2435 case 0xcd:
2436 data = 3;
2437 break;
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449 case MSR_EBC_FREQUENCY_ID:
2450 data = 1 << 24;
2451 break;
2452 case MSR_IA32_APICBASE:
2453 data = kvm_get_apic_base(vcpu);
2454 break;
2455 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2456 return kvm_x2apic_msr_read(vcpu, msr, pdata);
2457 break;
2458 case MSR_IA32_TSCDEADLINE:
2459 data = kvm_get_lapic_tscdeadline_msr(vcpu);
2460 break;
2461 case MSR_IA32_TSC_ADJUST:
2462 data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2463 break;
2464 case MSR_IA32_MISC_ENABLE:
2465 data = vcpu->arch.ia32_misc_enable_msr;
2466 break;
2467 case MSR_IA32_PERF_STATUS:
2468
2469 data = 1000ULL;
2470
2471 data |= (((uint64_t)4ULL) << 40);
2472 break;
2473 case MSR_EFER:
2474 data = vcpu->arch.efer;
2475 break;
2476 case MSR_KVM_WALL_CLOCK:
2477 case MSR_KVM_WALL_CLOCK_NEW:
2478 data = vcpu->kvm->arch.wall_clock;
2479 break;
2480 case MSR_KVM_SYSTEM_TIME:
2481 case MSR_KVM_SYSTEM_TIME_NEW:
2482 data = vcpu->arch.time;
2483 break;
2484 case MSR_KVM_ASYNC_PF_EN:
2485 data = vcpu->arch.apf.msr_val;
2486 break;
2487 case MSR_KVM_STEAL_TIME:
2488 data = vcpu->arch.st.msr_val;
2489 break;
2490 case MSR_KVM_PV_EOI_EN:
2491 data = vcpu->arch.pv_eoi.msr_val;
2492 break;
2493 case MSR_IA32_P5_MC_ADDR:
2494 case MSR_IA32_P5_MC_TYPE:
2495 case MSR_IA32_MCG_CAP:
2496 case MSR_IA32_MCG_CTL:
2497 case MSR_IA32_MCG_STATUS:
2498 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2499 return get_msr_mce(vcpu, msr, pdata);
2500 case MSR_K7_CLK_CTL:
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510 data = 0x20000000;
2511 break;
2512 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2513 if (kvm_hv_msr_partition_wide(msr)) {
2514 int r;
2515 mutex_lock(&vcpu->kvm->lock);
2516 r = get_msr_hyperv_pw(vcpu, msr, pdata);
2517 mutex_unlock(&vcpu->kvm->lock);
2518 return r;
2519 } else
2520 return get_msr_hyperv(vcpu, msr, pdata);
2521 break;
2522 case MSR_IA32_BBL_CR_CTL3:
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533 data = 0xbe702111;
2534 break;
2535 case MSR_AMD64_OSVW_ID_LENGTH:
2536 if (!guest_cpuid_has_osvw(vcpu))
2537 return 1;
2538 data = vcpu->arch.osvw.length;
2539 break;
2540 case MSR_AMD64_OSVW_STATUS:
2541 if (!guest_cpuid_has_osvw(vcpu))
2542 return 1;
2543 data = vcpu->arch.osvw.status;
2544 break;
2545 default:
2546 if (kvm_pmu_msr(vcpu, msr))
2547 return kvm_pmu_get_msr(vcpu, msr, pdata);
2548 if (!ignore_msrs) {
2549 vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
2550 return 1;
2551 } else {
2552 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
2553 data = 0;
2554 }
2555 break;
2556 }
2557 *pdata = data;
2558 return 0;
2559}
2560EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2561
2562
2563
2564
2565
2566
2567static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2568 struct kvm_msr_entry *entries,
2569 int (*do_msr)(struct kvm_vcpu *vcpu,
2570 unsigned index, u64 *data))
2571{
2572 int i, idx;
2573
2574 idx = srcu_read_lock(&vcpu->kvm->srcu);
2575 for (i = 0; i < msrs->nmsrs; ++i)
2576 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2577 break;
2578 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2579
2580 return i;
2581}
2582
2583
2584
2585
2586
2587
2588static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2589 int (*do_msr)(struct kvm_vcpu *vcpu,
2590 unsigned index, u64 *data),
2591 int writeback)
2592{
2593 struct kvm_msrs msrs;
2594 struct kvm_msr_entry *entries;
2595 int r, n;
2596 unsigned size;
2597
2598 r = -EFAULT;
2599 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2600 goto out;
2601
2602 r = -E2BIG;
2603 if (msrs.nmsrs >= MAX_IO_MSRS)
2604 goto out;
2605
2606 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2607 entries = memdup_user(user_msrs->entries, size);
2608 if (IS_ERR(entries)) {
2609 r = PTR_ERR(entries);
2610 goto out;
2611 }
2612
2613 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2614 if (r < 0)
2615 goto out_free;
2616
2617 r = -EFAULT;
2618 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2619 goto out_free;
2620
2621 r = n;
2622
2623out_free:
2624 kfree(entries);
2625out:
2626 return r;
2627}
2628
2629int kvm_dev_ioctl_check_extension(long ext)
2630{
2631 int r;
2632
2633 switch (ext) {
2634 case KVM_CAP_IRQCHIP:
2635 case KVM_CAP_HLT:
2636 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2637 case KVM_CAP_SET_TSS_ADDR:
2638 case KVM_CAP_EXT_CPUID:
2639 case KVM_CAP_EXT_EMUL_CPUID:
2640 case KVM_CAP_CLOCKSOURCE:
2641 case KVM_CAP_PIT:
2642 case KVM_CAP_NOP_IO_DELAY:
2643 case KVM_CAP_MP_STATE:
2644 case KVM_CAP_SYNC_MMU:
2645 case KVM_CAP_USER_NMI:
2646 case KVM_CAP_REINJECT_CONTROL:
2647 case KVM_CAP_IRQ_INJECT_STATUS:
2648 case KVM_CAP_IRQFD:
2649 case KVM_CAP_IOEVENTFD:
2650 case KVM_CAP_PIT2:
2651 case KVM_CAP_PIT_STATE2:
2652 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2653 case KVM_CAP_XEN_HVM:
2654 case KVM_CAP_ADJUST_CLOCK:
2655 case KVM_CAP_VCPU_EVENTS:
2656 case KVM_CAP_HYPERV:
2657 case KVM_CAP_HYPERV_VAPIC:
2658 case KVM_CAP_HYPERV_SPIN:
2659 case KVM_CAP_PCI_SEGMENT:
2660 case KVM_CAP_DEBUGREGS:
2661 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2662 case KVM_CAP_XSAVE:
2663 case KVM_CAP_ASYNC_PF:
2664 case KVM_CAP_GET_TSC_KHZ:
2665 case KVM_CAP_KVMCLOCK_CTRL:
2666 case KVM_CAP_READONLY_MEM:
2667 case KVM_CAP_HYPERV_TIME:
2668 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
2669#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2670 case KVM_CAP_ASSIGN_DEV_IRQ:
2671 case KVM_CAP_PCI_2_3:
2672#endif
2673 r = 1;
2674 break;
2675 case KVM_CAP_COALESCED_MMIO:
2676 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2677 break;
2678 case KVM_CAP_VAPIC:
2679 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2680 break;
2681 case KVM_CAP_NR_VCPUS:
2682 r = KVM_SOFT_MAX_VCPUS;
2683 break;
2684 case KVM_CAP_MAX_VCPUS:
2685 r = KVM_MAX_VCPUS;
2686 break;
2687 case KVM_CAP_NR_MEMSLOTS:
2688 r = KVM_USER_MEM_SLOTS;
2689 break;
2690 case KVM_CAP_PV_MMU:
2691 r = 0;
2692 break;
2693#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2694 case KVM_CAP_IOMMU:
2695 r = iommu_present(&pci_bus_type);
2696 break;
2697#endif
2698 case KVM_CAP_MCE:
2699 r = KVM_MAX_MCE_BANKS;
2700 break;
2701 case KVM_CAP_XCRS:
2702 r = cpu_has_xsave;
2703 break;
2704 case KVM_CAP_TSC_CONTROL:
2705 r = kvm_has_tsc_control;
2706 break;
2707 case KVM_CAP_TSC_DEADLINE_TIMER:
2708 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2709 break;
2710 default:
2711 r = 0;
2712 break;
2713 }
2714 return r;
2715
2716}
2717
2718long kvm_arch_dev_ioctl(struct file *filp,
2719 unsigned int ioctl, unsigned long arg)
2720{
2721 void __user *argp = (void __user *)arg;
2722 long r;
2723
2724 switch (ioctl) {
2725 case KVM_GET_MSR_INDEX_LIST: {
2726 struct kvm_msr_list __user *user_msr_list = argp;
2727 struct kvm_msr_list msr_list;
2728 unsigned n;
2729
2730 r = -EFAULT;
2731 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2732 goto out;
2733 n = msr_list.nmsrs;
2734 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2735 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2736 goto out;
2737 r = -E2BIG;
2738 if (n < msr_list.nmsrs)
2739 goto out;
2740 r = -EFAULT;
2741 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2742 num_msrs_to_save * sizeof(u32)))
2743 goto out;
2744 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2745 &emulated_msrs,
2746 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2747 goto out;
2748 r = 0;
2749 break;
2750 }
2751 case KVM_GET_SUPPORTED_CPUID:
2752 case KVM_GET_EMULATED_CPUID: {
2753 struct kvm_cpuid2 __user *cpuid_arg = argp;
2754 struct kvm_cpuid2 cpuid;
2755
2756 r = -EFAULT;
2757 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2758 goto out;
2759
2760 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
2761 ioctl);
2762 if (r)
2763 goto out;
2764
2765 r = -EFAULT;
2766 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2767 goto out;
2768 r = 0;
2769 break;
2770 }
2771 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2772 u64 mce_cap;
2773
2774 mce_cap = KVM_MCE_CAP_SUPPORTED;
2775 r = -EFAULT;
2776 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2777 goto out;
2778 r = 0;
2779 break;
2780 }
2781 default:
2782 r = -EINVAL;
2783 }
2784out:
2785 return r;
2786}
2787
2788static void wbinvd_ipi(void *garbage)
2789{
2790 wbinvd();
2791}
2792
2793static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2794{
2795 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
2796}
2797
2798void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2799{
2800
2801 if (need_emulate_wbinvd(vcpu)) {
2802 if (kvm_x86_ops->has_wbinvd_exit())
2803 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2804 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2805 smp_call_function_single(vcpu->cpu,
2806 wbinvd_ipi, NULL, 1);
2807 }
2808
2809 kvm_x86_ops->vcpu_load(vcpu, cpu);
2810
2811
2812 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2813 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2814 vcpu->arch.tsc_offset_adjustment = 0;
2815 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2816 }
2817
2818 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2819 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2820 native_read_tsc() - vcpu->arch.last_host_tsc;
2821 if (tsc_delta < 0)
2822 mark_tsc_unstable("KVM discovered backwards TSC");
2823 if (check_tsc_unstable()) {
2824 u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
2825 vcpu->arch.last_guest_tsc);
2826 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2827 vcpu->arch.tsc_catchup = 1;
2828 }
2829
2830
2831
2832
2833 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2834 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2835 if (vcpu->cpu != cpu)
2836 kvm_migrate_timers(vcpu);
2837 vcpu->cpu = cpu;
2838 }
2839
2840 accumulate_steal_time(vcpu);
2841 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2842}
2843
2844void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2845{
2846 kvm_x86_ops->vcpu_put(vcpu);
2847 kvm_put_guest_fpu(vcpu);
2848 vcpu->arch.last_host_tsc = native_read_tsc();
2849}
2850
2851static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2852 struct kvm_lapic_state *s)
2853{
2854 kvm_x86_ops->sync_pir_to_irr(vcpu);
2855 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2856
2857 return 0;
2858}
2859
2860static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2861 struct kvm_lapic_state *s)
2862{
2863 kvm_apic_post_state_restore(vcpu, s);
2864 update_cr8_intercept(vcpu);
2865
2866 return 0;
2867}
2868
2869static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2870 struct kvm_interrupt *irq)
2871{
2872 if (irq->irq >= KVM_NR_INTERRUPTS)
2873 return -EINVAL;
2874 if (irqchip_in_kernel(vcpu->kvm))
2875 return -ENXIO;
2876
2877 kvm_queue_interrupt(vcpu, irq->irq, false);
2878 kvm_make_request(KVM_REQ_EVENT, vcpu);
2879
2880 return 0;
2881}
2882
2883static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2884{
2885 kvm_inject_nmi(vcpu);
2886
2887 return 0;
2888}
2889
2890static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2891 struct kvm_tpr_access_ctl *tac)
2892{
2893 if (tac->flags)
2894 return -EINVAL;
2895 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2896 return 0;
2897}
2898
2899static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2900 u64 mcg_cap)
2901{
2902 int r;
2903 unsigned bank_num = mcg_cap & 0xff, bank;
2904
2905 r = -EINVAL;
2906 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2907 goto out;
2908 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2909 goto out;
2910 r = 0;
2911 vcpu->arch.mcg_cap = mcg_cap;
2912
2913 if (mcg_cap & MCG_CTL_P)
2914 vcpu->arch.mcg_ctl = ~(u64)0;
2915
2916 for (bank = 0; bank < bank_num; bank++)
2917 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2918out:
2919 return r;
2920}
2921
2922static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2923 struct kvm_x86_mce *mce)
2924{
2925 u64 mcg_cap = vcpu->arch.mcg_cap;
2926 unsigned bank_num = mcg_cap & 0xff;
2927 u64 *banks = vcpu->arch.mce_banks;
2928
2929 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2930 return -EINVAL;
2931
2932
2933
2934
2935 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2936 vcpu->arch.mcg_ctl != ~(u64)0)
2937 return 0;
2938 banks += 4 * mce->bank;
2939
2940
2941
2942
2943 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2944 return 0;
2945 if (mce->status & MCI_STATUS_UC) {
2946 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2947 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2948 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2949 return 0;
2950 }
2951 if (banks[1] & MCI_STATUS_VAL)
2952 mce->status |= MCI_STATUS_OVER;
2953 banks[2] = mce->addr;
2954 banks[3] = mce->misc;
2955 vcpu->arch.mcg_status = mce->mcg_status;
2956 banks[1] = mce->status;
2957 kvm_queue_exception(vcpu, MC_VECTOR);
2958 } else if (!(banks[1] & MCI_STATUS_VAL)
2959 || !(banks[1] & MCI_STATUS_UC)) {
2960 if (banks[1] & MCI_STATUS_VAL)
2961 mce->status |= MCI_STATUS_OVER;
2962 banks[2] = mce->addr;
2963 banks[3] = mce->misc;
2964 banks[1] = mce->status;
2965 } else
2966 banks[1] |= MCI_STATUS_OVER;
2967 return 0;
2968}
2969
2970static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2971 struct kvm_vcpu_events *events)
2972{
2973 process_nmi(vcpu);
2974 events->exception.injected =
2975 vcpu->arch.exception.pending &&
2976 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2977 events->exception.nr = vcpu->arch.exception.nr;
2978 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2979 events->exception.pad = 0;
2980 events->exception.error_code = vcpu->arch.exception.error_code;
2981
2982 events->interrupt.injected =
2983 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2984 events->interrupt.nr = vcpu->arch.interrupt.nr;
2985 events->interrupt.soft = 0;
2986 events->interrupt.shadow =
2987 kvm_x86_ops->get_interrupt_shadow(vcpu,
2988 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2989
2990 events->nmi.injected = vcpu->arch.nmi_injected;
2991 events->nmi.pending = vcpu->arch.nmi_pending != 0;
2992 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2993 events->nmi.pad = 0;
2994
2995 events->sipi_vector = 0;
2996
2997 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2998 | KVM_VCPUEVENT_VALID_SHADOW);
2999 memset(&events->reserved, 0, sizeof(events->reserved));
3000}
3001
3002static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3003 struct kvm_vcpu_events *events)
3004{
3005 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
3006 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
3007 | KVM_VCPUEVENT_VALID_SHADOW))
3008 return -EINVAL;
3009
3010 process_nmi(vcpu);
3011 vcpu->arch.exception.pending = events->exception.injected;
3012 vcpu->arch.exception.nr = events->exception.nr;
3013 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
3014 vcpu->arch.exception.error_code = events->exception.error_code;
3015
3016 vcpu->arch.interrupt.pending = events->interrupt.injected;
3017 vcpu->arch.interrupt.nr = events->interrupt.nr;
3018 vcpu->arch.interrupt.soft = events->interrupt.soft;
3019 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
3020 kvm_x86_ops->set_interrupt_shadow(vcpu,
3021 events->interrupt.shadow);
3022
3023 vcpu->arch.nmi_injected = events->nmi.injected;
3024 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
3025 vcpu->arch.nmi_pending = events->nmi.pending;
3026 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
3027
3028 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
3029 kvm_vcpu_has_lapic(vcpu))
3030 vcpu->arch.apic->sipi_vector = events->sipi_vector;
3031
3032 kvm_make_request(KVM_REQ_EVENT, vcpu);
3033
3034 return 0;
3035}
3036
3037static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3038 struct kvm_debugregs *dbgregs)
3039{
3040 unsigned long val;
3041
3042 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3043 _kvm_get_dr(vcpu, 6, &val);
3044 dbgregs->dr6 = val;
3045 dbgregs->dr7 = vcpu->arch.dr7;
3046 dbgregs->flags = 0;
3047 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
3048}
3049
3050static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3051 struct kvm_debugregs *dbgregs)
3052{
3053 if (dbgregs->flags)
3054 return -EINVAL;
3055
3056 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
3057 vcpu->arch.dr6 = dbgregs->dr6;
3058 kvm_update_dr6(vcpu);
3059 vcpu->arch.dr7 = dbgregs->dr7;
3060 kvm_update_dr7(vcpu);
3061
3062 return 0;
3063}
3064
3065static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3066 struct kvm_xsave *guest_xsave)
3067{
3068 if (cpu_has_xsave) {
3069 memcpy(guest_xsave->region,
3070 &vcpu->arch.guest_fpu.state->xsave,
3071 vcpu->arch.guest_xstate_size);
3072 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
3073 vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
3074 } else {
3075 memcpy(guest_xsave->region,
3076 &vcpu->arch.guest_fpu.state->fxsave,
3077 sizeof(struct i387_fxsave_struct));
3078 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
3079 XSTATE_FPSSE;
3080 }
3081}
3082
3083static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3084 struct kvm_xsave *guest_xsave)
3085{
3086 u64 xstate_bv =
3087 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3088
3089 if (cpu_has_xsave) {
3090
3091
3092
3093
3094
3095 if (xstate_bv & ~kvm_supported_xcr0())
3096 return -EINVAL;
3097 memcpy(&vcpu->arch.guest_fpu.state->xsave,
3098 guest_xsave->region, vcpu->arch.guest_xstate_size);
3099 } else {
3100 if (xstate_bv & ~XSTATE_FPSSE)
3101 return -EINVAL;
3102 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
3103 guest_xsave->region, sizeof(struct i387_fxsave_struct));
3104 }
3105 return 0;
3106}
3107
3108static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
3109 struct kvm_xcrs *guest_xcrs)
3110{
3111 if (!cpu_has_xsave) {
3112 guest_xcrs->nr_xcrs = 0;
3113 return;
3114 }
3115
3116 guest_xcrs->nr_xcrs = 1;
3117 guest_xcrs->flags = 0;
3118 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
3119 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
3120}
3121
3122static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3123 struct kvm_xcrs *guest_xcrs)
3124{
3125 int i, r = 0;
3126
3127 if (!cpu_has_xsave)
3128 return -EINVAL;
3129
3130 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
3131 return -EINVAL;
3132
3133 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3134
3135 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3136 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3137 guest_xcrs->xcrs[i].value);
3138 break;
3139 }
3140 if (r)
3141 r = -EINVAL;
3142 return r;
3143}
3144
3145
3146
3147
3148
3149
3150
3151static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
3152{
3153 if (!vcpu->arch.pv_time_enabled)
3154 return -EINVAL;
3155 vcpu->arch.pvclock_set_guest_stopped_request = true;
3156 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3157 return 0;
3158}
3159
3160long kvm_arch_vcpu_ioctl(struct file *filp,
3161 unsigned int ioctl, unsigned long arg)
3162{
3163 struct kvm_vcpu *vcpu = filp->private_data;
3164 void __user *argp = (void __user *)arg;
3165 int r;
3166 union {
3167 struct kvm_lapic_state *lapic;
3168 struct kvm_xsave *xsave;
3169 struct kvm_xcrs *xcrs;
3170 void *buffer;
3171 } u;
3172
3173 u.buffer = NULL;
3174 switch (ioctl) {
3175 case KVM_GET_LAPIC: {
3176 r = -EINVAL;
3177 if (!vcpu->arch.apic)
3178 goto out;
3179 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3180
3181 r = -ENOMEM;
3182 if (!u.lapic)
3183 goto out;
3184 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3185 if (r)
3186 goto out;
3187 r = -EFAULT;
3188 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3189 goto out;
3190 r = 0;
3191 break;
3192 }
3193 case KVM_SET_LAPIC: {
3194 r = -EINVAL;
3195 if (!vcpu->arch.apic)
3196 goto out;
3197 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3198 if (IS_ERR(u.lapic))
3199 return PTR_ERR(u.lapic);
3200
3201 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3202 break;
3203 }
3204 case KVM_INTERRUPT: {
3205 struct kvm_interrupt irq;
3206
3207 r = -EFAULT;
3208 if (copy_from_user(&irq, argp, sizeof irq))
3209 goto out;
3210 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3211 break;
3212 }
3213 case KVM_NMI: {
3214 r = kvm_vcpu_ioctl_nmi(vcpu);
3215 break;
3216 }
3217 case KVM_SET_CPUID: {
3218 struct kvm_cpuid __user *cpuid_arg = argp;
3219 struct kvm_cpuid cpuid;
3220
3221 r = -EFAULT;
3222 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3223 goto out;
3224 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3225 break;
3226 }
3227 case KVM_SET_CPUID2: {
3228 struct kvm_cpuid2 __user *cpuid_arg = argp;
3229 struct kvm_cpuid2 cpuid;
3230
3231 r = -EFAULT;
3232 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3233 goto out;
3234 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3235 cpuid_arg->entries);
3236 break;
3237 }
3238 case KVM_GET_CPUID2: {
3239 struct kvm_cpuid2 __user *cpuid_arg = argp;
3240 struct kvm_cpuid2 cpuid;
3241
3242 r = -EFAULT;
3243 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3244 goto out;
3245 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3246 cpuid_arg->entries);
3247 if (r)
3248 goto out;
3249 r = -EFAULT;
3250 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3251 goto out;
3252 r = 0;
3253 break;
3254 }
3255 case KVM_GET_MSRS:
3256 r = msr_io(vcpu, argp, kvm_get_msr, 1);
3257 break;
3258 case KVM_SET_MSRS:
3259 r = msr_io(vcpu, argp, do_set_msr, 0);
3260 break;
3261 case KVM_TPR_ACCESS_REPORTING: {
3262 struct kvm_tpr_access_ctl tac;
3263
3264 r = -EFAULT;
3265 if (copy_from_user(&tac, argp, sizeof tac))
3266 goto out;
3267 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3268 if (r)
3269 goto out;
3270 r = -EFAULT;
3271 if (copy_to_user(argp, &tac, sizeof tac))
3272 goto out;
3273 r = 0;
3274 break;
3275 };
3276 case KVM_SET_VAPIC_ADDR: {
3277 struct kvm_vapic_addr va;
3278
3279 r = -EINVAL;
3280 if (!irqchip_in_kernel(vcpu->kvm))
3281 goto out;
3282 r = -EFAULT;
3283 if (copy_from_user(&va, argp, sizeof va))
3284 goto out;
3285 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3286 break;
3287 }
3288 case KVM_X86_SETUP_MCE: {
3289 u64 mcg_cap;
3290
3291 r = -EFAULT;
3292 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3293 goto out;
3294 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3295 break;
3296 }
3297 case KVM_X86_SET_MCE: {
3298 struct kvm_x86_mce mce;
3299
3300 r = -EFAULT;
3301 if (copy_from_user(&mce, argp, sizeof mce))
3302 goto out;
3303 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3304 break;
3305 }
3306 case KVM_GET_VCPU_EVENTS: {
3307 struct kvm_vcpu_events events;
3308
3309 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3310
3311 r = -EFAULT;
3312 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3313 break;
3314 r = 0;
3315 break;
3316 }
3317 case KVM_SET_VCPU_EVENTS: {
3318 struct kvm_vcpu_events events;
3319
3320 r = -EFAULT;
3321 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3322 break;
3323
3324 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3325 break;
3326 }
3327 case KVM_GET_DEBUGREGS: {
3328 struct kvm_debugregs dbgregs;
3329
3330 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3331
3332 r = -EFAULT;
3333 if (copy_to_user(argp, &dbgregs,
3334 sizeof(struct kvm_debugregs)))
3335 break;
3336 r = 0;
3337 break;
3338 }
3339 case KVM_SET_DEBUGREGS: {
3340 struct kvm_debugregs dbgregs;
3341
3342 r = -EFAULT;
3343 if (copy_from_user(&dbgregs, argp,
3344 sizeof(struct kvm_debugregs)))
3345 break;
3346
3347 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3348 break;
3349 }
3350 case KVM_GET_XSAVE: {
3351 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3352 r = -ENOMEM;
3353 if (!u.xsave)
3354 break;
3355
3356 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3357
3358 r = -EFAULT;
3359 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3360 break;
3361 r = 0;
3362 break;
3363 }
3364 case KVM_SET_XSAVE: {
3365 u.xsave = memdup_user(argp, sizeof(*u.xsave));
3366 if (IS_ERR(u.xsave))
3367 return PTR_ERR(u.xsave);
3368
3369 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3370 break;
3371 }
3372 case KVM_GET_XCRS: {
3373 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3374 r = -ENOMEM;
3375 if (!u.xcrs)
3376 break;
3377
3378 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3379
3380 r = -EFAULT;
3381 if (copy_to_user(argp, u.xcrs,
3382 sizeof(struct kvm_xcrs)))
3383 break;
3384 r = 0;
3385 break;
3386 }
3387 case KVM_SET_XCRS: {
3388 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
3389 if (IS_ERR(u.xcrs))
3390 return PTR_ERR(u.xcrs);
3391
3392 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3393 break;
3394 }
3395 case KVM_SET_TSC_KHZ: {
3396 u32 user_tsc_khz;
3397
3398 r = -EINVAL;
3399 user_tsc_khz = (u32)arg;
3400
3401 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3402 goto out;
3403
3404 if (user_tsc_khz == 0)
3405 user_tsc_khz = tsc_khz;
3406
3407 kvm_set_tsc_khz(vcpu, user_tsc_khz);
3408
3409 r = 0;
3410 goto out;
3411 }
3412 case KVM_GET_TSC_KHZ: {
3413 r = vcpu->arch.virtual_tsc_khz;
3414 goto out;
3415 }
3416 case KVM_KVMCLOCK_CTRL: {
3417 r = kvm_set_guest_paused(vcpu);
3418 goto out;
3419 }
3420 default:
3421 r = -EINVAL;
3422 }
3423out:
3424 kfree(u.buffer);
3425 return r;
3426}
3427
3428int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3429{
3430 return VM_FAULT_SIGBUS;
3431}
3432
3433static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3434{
3435 int ret;
3436
3437 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3438 return -EINVAL;
3439 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3440 return ret;
3441}
3442
3443static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3444 u64 ident_addr)
3445{
3446 kvm->arch.ept_identity_map_addr = ident_addr;
3447 return 0;
3448}
3449
3450static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3451 u32 kvm_nr_mmu_pages)
3452{
3453 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3454 return -EINVAL;
3455
3456 mutex_lock(&kvm->slots_lock);
3457
3458 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3459 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3460
3461 mutex_unlock(&kvm->slots_lock);
3462 return 0;
3463}
3464
3465static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3466{
3467 return kvm->arch.n_max_mmu_pages;
3468}
3469
3470static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3471{
3472 int r;
3473
3474 r = 0;
3475 switch (chip->chip_id) {
3476 case KVM_IRQCHIP_PIC_MASTER:
3477 memcpy(&chip->chip.pic,
3478 &pic_irqchip(kvm)->pics[0],
3479 sizeof(struct kvm_pic_state));
3480 break;
3481 case KVM_IRQCHIP_PIC_SLAVE:
3482 memcpy(&chip->chip.pic,
3483 &pic_irqchip(kvm)->pics[1],
3484 sizeof(struct kvm_pic_state));
3485 break;
3486 case KVM_IRQCHIP_IOAPIC:
3487 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3488 break;
3489 default:
3490 r = -EINVAL;
3491 break;
3492 }
3493 return r;
3494}
3495
3496static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3497{
3498 int r;
3499
3500 r = 0;
3501 switch (chip->chip_id) {
3502 case KVM_IRQCHIP_PIC_MASTER:
3503 spin_lock(&pic_irqchip(kvm)->lock);
3504 memcpy(&pic_irqchip(kvm)->pics[0],
3505 &chip->chip.pic,
3506 sizeof(struct kvm_pic_state));
3507 spin_unlock(&pic_irqchip(kvm)->lock);
3508 break;
3509 case KVM_IRQCHIP_PIC_SLAVE:
3510 spin_lock(&pic_irqchip(kvm)->lock);
3511 memcpy(&pic_irqchip(kvm)->pics[1],
3512 &chip->chip.pic,
3513 sizeof(struct kvm_pic_state));
3514 spin_unlock(&pic_irqchip(kvm)->lock);
3515 break;
3516 case KVM_IRQCHIP_IOAPIC:
3517 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3518 break;
3519 default:
3520 r = -EINVAL;
3521 break;
3522 }
3523 kvm_pic_update_irq(pic_irqchip(kvm));
3524 return r;
3525}
3526
3527static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3528{
3529 int r = 0;
3530
3531 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3532 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3533 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3534 return r;
3535}
3536
3537static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3538{
3539 int r = 0;
3540
3541 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3542 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3543 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3544 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3545 return r;
3546}
3547
3548static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3549{
3550 int r = 0;
3551
3552 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3553 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3554 sizeof(ps->channels));
3555 ps->flags = kvm->arch.vpit->pit_state.flags;
3556 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3557 memset(&ps->reserved, 0, sizeof(ps->reserved));
3558 return r;
3559}
3560
3561static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3562{
3563 int r = 0, start = 0;
3564 u32 prev_legacy, cur_legacy;
3565 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3566 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3567 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3568 if (!prev_legacy && cur_legacy)
3569 start = 1;
3570 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3571 sizeof(kvm->arch.vpit->pit_state.channels));
3572 kvm->arch.vpit->pit_state.flags = ps->flags;
3573 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3574 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3575 return r;
3576}
3577
3578static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3579 struct kvm_reinject_control *control)
3580{
3581 if (!kvm->arch.vpit)
3582 return -ENXIO;
3583 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3584 kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
3585 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3586 return 0;
3587}
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3609{
3610 int r;
3611 struct kvm_memory_slot *memslot;
3612 unsigned long n, i;
3613 unsigned long *dirty_bitmap;
3614 unsigned long *dirty_bitmap_buffer;
3615 bool is_dirty = false;
3616
3617 mutex_lock(&kvm->slots_lock);
3618
3619 r = -EINVAL;
3620 if (log->slot >= KVM_USER_MEM_SLOTS)
3621 goto out;
3622
3623 memslot = id_to_memslot(kvm->memslots, log->slot);
3624
3625 dirty_bitmap = memslot->dirty_bitmap;
3626 r = -ENOENT;
3627 if (!dirty_bitmap)
3628 goto out;
3629
3630 n = kvm_dirty_bitmap_bytes(memslot);
3631
3632 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3633 memset(dirty_bitmap_buffer, 0, n);
3634
3635 spin_lock(&kvm->mmu_lock);
3636
3637 for (i = 0; i < n / sizeof(long); i++) {
3638 unsigned long mask;
3639 gfn_t offset;
3640
3641 if (!dirty_bitmap[i])
3642 continue;
3643
3644 is_dirty = true;
3645
3646 mask = xchg(&dirty_bitmap[i], 0);
3647 dirty_bitmap_buffer[i] = mask;
3648
3649 offset = i * BITS_PER_LONG;
3650 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3651 }
3652 if (is_dirty)
3653 kvm_flush_remote_tlbs(kvm);
3654
3655 spin_unlock(&kvm->mmu_lock);
3656
3657 r = -EFAULT;
3658 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3659 goto out;
3660
3661 r = 0;
3662out:
3663 mutex_unlock(&kvm->slots_lock);
3664 return r;
3665}
3666
3667int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
3668 bool line_status)
3669{
3670 if (!irqchip_in_kernel(kvm))
3671 return -ENXIO;
3672
3673 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3674 irq_event->irq, irq_event->level,
3675 line_status);
3676 return 0;
3677}
3678
3679long kvm_arch_vm_ioctl(struct file *filp,
3680 unsigned int ioctl, unsigned long arg)
3681{
3682 struct kvm *kvm = filp->private_data;
3683 void __user *argp = (void __user *)arg;
3684 int r = -ENOTTY;
3685
3686
3687
3688
3689
3690 union {
3691 struct kvm_pit_state ps;
3692 struct kvm_pit_state2 ps2;
3693 struct kvm_pit_config pit_config;
3694 } u;
3695
3696 switch (ioctl) {
3697 case KVM_SET_TSS_ADDR:
3698 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3699 break;
3700 case KVM_SET_IDENTITY_MAP_ADDR: {
3701 u64 ident_addr;
3702
3703 r = -EFAULT;
3704 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3705 goto out;
3706 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3707 break;
3708 }
3709 case KVM_SET_NR_MMU_PAGES:
3710 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3711 break;
3712 case KVM_GET_NR_MMU_PAGES:
3713 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3714 break;
3715 case KVM_CREATE_IRQCHIP: {
3716 struct kvm_pic *vpic;
3717
3718 mutex_lock(&kvm->lock);
3719 r = -EEXIST;
3720 if (kvm->arch.vpic)
3721 goto create_irqchip_unlock;
3722 r = -EINVAL;
3723 if (atomic_read(&kvm->online_vcpus))
3724 goto create_irqchip_unlock;
3725 r = -ENOMEM;
3726 vpic = kvm_create_pic(kvm);
3727 if (vpic) {
3728 r = kvm_ioapic_init(kvm);
3729 if (r) {
3730 mutex_lock(&kvm->slots_lock);
3731 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3732 &vpic->dev_master);
3733 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3734 &vpic->dev_slave);
3735 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3736 &vpic->dev_eclr);
3737 mutex_unlock(&kvm->slots_lock);
3738 kfree(vpic);
3739 goto create_irqchip_unlock;
3740 }
3741 } else
3742 goto create_irqchip_unlock;
3743 smp_wmb();
3744 kvm->arch.vpic = vpic;
3745 smp_wmb();
3746 r = kvm_setup_default_irq_routing(kvm);
3747 if (r) {
3748 mutex_lock(&kvm->slots_lock);
3749 mutex_lock(&kvm->irq_lock);
3750 kvm_ioapic_destroy(kvm);
3751 kvm_destroy_pic(kvm);
3752 mutex_unlock(&kvm->irq_lock);
3753 mutex_unlock(&kvm->slots_lock);
3754 }
3755 create_irqchip_unlock:
3756 mutex_unlock(&kvm->lock);
3757 break;
3758 }
3759 case KVM_CREATE_PIT:
3760 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3761 goto create_pit;
3762 case KVM_CREATE_PIT2:
3763 r = -EFAULT;
3764 if (copy_from_user(&u.pit_config, argp,
3765 sizeof(struct kvm_pit_config)))
3766 goto out;
3767 create_pit:
3768 mutex_lock(&kvm->slots_lock);
3769 r = -EEXIST;
3770 if (kvm->arch.vpit)
3771 goto create_pit_unlock;
3772 r = -ENOMEM;
3773 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3774 if (kvm->arch.vpit)
3775 r = 0;
3776 create_pit_unlock:
3777 mutex_unlock(&kvm->slots_lock);
3778 break;
3779 case KVM_GET_IRQCHIP: {
3780
3781 struct kvm_irqchip *chip;
3782
3783 chip = memdup_user(argp, sizeof(*chip));
3784 if (IS_ERR(chip)) {
3785 r = PTR_ERR(chip);
3786 goto out;
3787 }
3788
3789 r = -ENXIO;
3790 if (!irqchip_in_kernel(kvm))
3791 goto get_irqchip_out;
3792 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3793 if (r)
3794 goto get_irqchip_out;
3795 r = -EFAULT;
3796 if (copy_to_user(argp, chip, sizeof *chip))
3797 goto get_irqchip_out;
3798 r = 0;
3799 get_irqchip_out:
3800 kfree(chip);
3801 break;
3802 }
3803 case KVM_SET_IRQCHIP: {
3804
3805 struct kvm_irqchip *chip;
3806
3807 chip = memdup_user(argp, sizeof(*chip));
3808 if (IS_ERR(chip)) {
3809 r = PTR_ERR(chip);
3810 goto out;
3811 }
3812
3813 r = -ENXIO;
3814 if (!irqchip_in_kernel(kvm))
3815 goto set_irqchip_out;
3816 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3817 if (r)
3818 goto set_irqchip_out;
3819 r = 0;
3820 set_irqchip_out:
3821 kfree(chip);
3822 break;
3823 }
3824 case KVM_GET_PIT: {
3825 r = -EFAULT;
3826 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3827 goto out;
3828 r = -ENXIO;
3829 if (!kvm->arch.vpit)
3830 goto out;
3831 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3832 if (r)
3833 goto out;
3834 r = -EFAULT;
3835 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3836 goto out;
3837 r = 0;
3838 break;
3839 }
3840 case KVM_SET_PIT: {
3841 r = -EFAULT;
3842 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3843 goto out;
3844 r = -ENXIO;
3845 if (!kvm->arch.vpit)
3846 goto out;
3847 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3848 break;
3849 }
3850 case KVM_GET_PIT2: {
3851 r = -ENXIO;
3852 if (!kvm->arch.vpit)
3853 goto out;
3854 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3855 if (r)
3856 goto out;
3857 r = -EFAULT;
3858 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3859 goto out;
3860 r = 0;
3861 break;
3862 }
3863 case KVM_SET_PIT2: {
3864 r = -EFAULT;
3865 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3866 goto out;
3867 r = -ENXIO;
3868 if (!kvm->arch.vpit)
3869 goto out;
3870 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3871 break;
3872 }
3873 case KVM_REINJECT_CONTROL: {
3874 struct kvm_reinject_control control;
3875 r = -EFAULT;
3876 if (copy_from_user(&control, argp, sizeof(control)))
3877 goto out;
3878 r = kvm_vm_ioctl_reinject(kvm, &control);
3879 break;
3880 }
3881 case KVM_XEN_HVM_CONFIG: {
3882 r = -EFAULT;
3883 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3884 sizeof(struct kvm_xen_hvm_config)))
3885 goto out;
3886 r = -EINVAL;
3887 if (kvm->arch.xen_hvm_config.flags)
3888 goto out;
3889 r = 0;
3890 break;
3891 }
3892 case KVM_SET_CLOCK: {
3893 struct kvm_clock_data user_ns;
3894 u64 now_ns;
3895 s64 delta;
3896
3897 r = -EFAULT;
3898 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3899 goto out;
3900
3901 r = -EINVAL;
3902 if (user_ns.flags)
3903 goto out;
3904
3905 r = 0;
3906 local_irq_disable();
3907 now_ns = get_kernel_ns();
3908 delta = user_ns.clock - now_ns;
3909 local_irq_enable();
3910 kvm->arch.kvmclock_offset = delta;
3911 kvm_gen_update_masterclock(kvm);
3912 break;
3913 }
3914 case KVM_GET_CLOCK: {
3915 struct kvm_clock_data user_ns;
3916 u64 now_ns;
3917
3918 local_irq_disable();
3919 now_ns = get_kernel_ns();
3920 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3921 local_irq_enable();
3922 user_ns.flags = 0;
3923 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3924
3925 r = -EFAULT;
3926 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3927 goto out;
3928 r = 0;
3929 break;
3930 }
3931
3932 default:
3933 ;
3934 }
3935out:
3936 return r;
3937}
3938
3939static void kvm_init_msr_list(void)
3940{
3941 u32 dummy[2];
3942 unsigned i, j;
3943
3944
3945 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3946 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3947 continue;
3948
3949
3950
3951
3952
3953
3954
3955
3956 switch (msrs_to_save[i]) {
3957 case MSR_IA32_BNDCFGS:
3958 if (!kvm_x86_ops->mpx_supported())
3959 continue;
3960 break;
3961 default:
3962 break;
3963 }
3964
3965 if (j < i)
3966 msrs_to_save[j] = msrs_to_save[i];
3967 j++;
3968 }
3969 num_msrs_to_save = j;
3970}
3971
3972static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3973 const void *v)
3974{
3975 int handled = 0;
3976 int n;
3977
3978 do {
3979 n = min(len, 8);
3980 if (!(vcpu->arch.apic &&
3981 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
3982 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3983 break;
3984 handled += n;
3985 addr += n;
3986 len -= n;
3987 v += n;
3988 } while (len);
3989
3990 return handled;
3991}
3992
3993static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3994{
3995 int handled = 0;
3996 int n;
3997
3998 do {
3999 n = min(len, 8);
4000 if (!(vcpu->arch.apic &&
4001 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
4002 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
4003 break;
4004 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
4005 handled += n;
4006 addr += n;
4007 len -= n;
4008 v += n;
4009 } while (len);
4010
4011 return handled;
4012}
4013
4014static void kvm_set_segment(struct kvm_vcpu *vcpu,
4015 struct kvm_segment *var, int seg)
4016{
4017 kvm_x86_ops->set_segment(vcpu, var, seg);
4018}
4019
4020void kvm_get_segment(struct kvm_vcpu *vcpu,
4021 struct kvm_segment *var, int seg)
4022{
4023 kvm_x86_ops->get_segment(vcpu, var, seg);
4024}
4025
4026gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
4027{
4028 gpa_t t_gpa;
4029 struct x86_exception exception;
4030
4031 BUG_ON(!mmu_is_nested(vcpu));
4032
4033
4034 access |= PFERR_USER_MASK;
4035 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
4036
4037 return t_gpa;
4038}
4039
4040gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
4041 struct x86_exception *exception)
4042{
4043 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4044 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4045}
4046
4047 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
4048 struct x86_exception *exception)
4049{
4050 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4051 access |= PFERR_FETCH_MASK;
4052 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4053}
4054
4055gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
4056 struct x86_exception *exception)
4057{
4058 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4059 access |= PFERR_WRITE_MASK;
4060 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4061}
4062
4063
4064gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
4065 struct x86_exception *exception)
4066{
4067 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
4068}
4069
4070static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
4071 struct kvm_vcpu *vcpu, u32 access,
4072 struct x86_exception *exception)
4073{
4074 void *data = val;
4075 int r = X86EMUL_CONTINUE;
4076
4077 while (bytes) {
4078 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
4079 exception);
4080 unsigned offset = addr & (PAGE_SIZE-1);
4081 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
4082 int ret;
4083
4084 if (gpa == UNMAPPED_GVA)
4085 return X86EMUL_PROPAGATE_FAULT;
4086 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
4087 if (ret < 0) {
4088 r = X86EMUL_IO_NEEDED;
4089 goto out;
4090 }
4091
4092 bytes -= toread;
4093 data += toread;
4094 addr += toread;
4095 }
4096out:
4097 return r;
4098}
4099
4100
4101static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
4102 gva_t addr, void *val, unsigned int bytes,
4103 struct x86_exception *exception)
4104{
4105 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4106 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4107
4108 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
4109 access | PFERR_FETCH_MASK,
4110 exception);
4111}
4112
4113int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
4114 gva_t addr, void *val, unsigned int bytes,
4115 struct x86_exception *exception)
4116{
4117 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4118 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4119
4120 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
4121 exception);
4122}
4123EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
4124
4125static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4126 gva_t addr, void *val, unsigned int bytes,
4127 struct x86_exception *exception)
4128{
4129 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4130 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
4131}
4132
4133int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4134 gva_t addr, void *val,
4135 unsigned int bytes,
4136 struct x86_exception *exception)
4137{
4138 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4139 void *data = val;
4140 int r = X86EMUL_CONTINUE;
4141
4142 while (bytes) {
4143 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
4144 PFERR_WRITE_MASK,
4145 exception);
4146 unsigned offset = addr & (PAGE_SIZE-1);
4147 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
4148 int ret;
4149
4150 if (gpa == UNMAPPED_GVA)
4151 return X86EMUL_PROPAGATE_FAULT;
4152 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
4153 if (ret < 0) {
4154 r = X86EMUL_IO_NEEDED;
4155 goto out;
4156 }
4157
4158 bytes -= towrite;
4159 data += towrite;
4160 addr += towrite;
4161 }
4162out:
4163 return r;
4164}
4165EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4166
4167static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4168 gpa_t *gpa, struct x86_exception *exception,
4169 bool write)
4170{
4171 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
4172 | (write ? PFERR_WRITE_MASK : 0);
4173
4174 if (vcpu_match_mmio_gva(vcpu, gva)
4175 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
4176 vcpu->arch.access, access)) {
4177 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4178 (gva & (PAGE_SIZE - 1));
4179 trace_vcpu_match_mmio(gva, *gpa, write, false);
4180 return 1;
4181 }
4182
4183 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4184
4185 if (*gpa == UNMAPPED_GVA)
4186 return -1;
4187
4188
4189 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4190 return 1;
4191
4192 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
4193 trace_vcpu_match_mmio(gva, *gpa, write, true);
4194 return 1;
4195 }
4196
4197 return 0;
4198}
4199
4200int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4201 const void *val, int bytes)
4202{
4203 int ret;
4204
4205 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
4206 if (ret < 0)
4207 return 0;
4208 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
4209 return 1;
4210}
4211
4212struct read_write_emulator_ops {
4213 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
4214 int bytes);
4215 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
4216 void *val, int bytes);
4217 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4218 int bytes, void *val);
4219 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4220 void *val, int bytes);
4221 bool write;
4222};
4223
4224static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4225{
4226 if (vcpu->mmio_read_completed) {
4227 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4228 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
4229 vcpu->mmio_read_completed = 0;
4230 return 1;
4231 }
4232
4233 return 0;
4234}
4235
4236static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4237 void *val, int bytes)
4238{
4239 return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
4240}
4241
4242static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4243 void *val, int bytes)
4244{
4245 return emulator_write_phys(vcpu, gpa, val, bytes);
4246}
4247
4248static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4249{
4250 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
4251 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4252}
4253
4254static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4255 void *val, int bytes)
4256{
4257 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
4258 return X86EMUL_IO_NEEDED;
4259}
4260
4261static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4262 void *val, int bytes)
4263{
4264 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
4265
4266 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
4267 return X86EMUL_CONTINUE;
4268}
4269
4270static const struct read_write_emulator_ops read_emultor = {
4271 .read_write_prepare = read_prepare,
4272 .read_write_emulate = read_emulate,
4273 .read_write_mmio = vcpu_mmio_read,
4274 .read_write_exit_mmio = read_exit_mmio,
4275};
4276
4277static const struct read_write_emulator_ops write_emultor = {
4278 .read_write_emulate = write_emulate,
4279 .read_write_mmio = write_mmio,
4280 .read_write_exit_mmio = write_exit_mmio,
4281 .write = true,
4282};
4283
4284static int emulator_read_write_onepage(unsigned long addr, void *val,
4285 unsigned int bytes,
4286 struct x86_exception *exception,
4287 struct kvm_vcpu *vcpu,
4288 const struct read_write_emulator_ops *ops)
4289{
4290 gpa_t gpa;
4291 int handled, ret;
4292 bool write = ops->write;
4293 struct kvm_mmio_fragment *frag;
4294
4295 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
4296
4297 if (ret < 0)
4298 return X86EMUL_PROPAGATE_FAULT;
4299
4300
4301 if (ret)
4302 goto mmio;
4303
4304 if (ops->read_write_emulate(vcpu, gpa, val, bytes))
4305 return X86EMUL_CONTINUE;
4306
4307mmio:
4308
4309
4310
4311 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
4312 if (handled == bytes)
4313 return X86EMUL_CONTINUE;
4314
4315 gpa += handled;
4316 bytes -= handled;
4317 val += handled;
4318
4319 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
4320 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
4321 frag->gpa = gpa;
4322 frag->data = val;
4323 frag->len = bytes;
4324 return X86EMUL_CONTINUE;
4325}
4326
4327int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
4328 void *val, unsigned int bytes,
4329 struct x86_exception *exception,
4330 const struct read_write_emulator_ops *ops)
4331{
4332 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4333 gpa_t gpa;
4334 int rc;
4335
4336 if (ops->read_write_prepare &&
4337 ops->read_write_prepare(vcpu, val, bytes))
4338 return X86EMUL_CONTINUE;
4339
4340 vcpu->mmio_nr_fragments = 0;
4341
4342
4343 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
4344 int now;
4345
4346 now = -addr & ~PAGE_MASK;
4347 rc = emulator_read_write_onepage(addr, val, now, exception,
4348 vcpu, ops);
4349
4350 if (rc != X86EMUL_CONTINUE)
4351 return rc;
4352 addr += now;
4353 val += now;
4354 bytes -= now;
4355 }
4356
4357 rc = emulator_read_write_onepage(addr, val, bytes, exception,
4358 vcpu, ops);
4359 if (rc != X86EMUL_CONTINUE)
4360 return rc;
4361
4362 if (!vcpu->mmio_nr_fragments)
4363 return rc;
4364
4365 gpa = vcpu->mmio_fragments[0].gpa;
4366
4367 vcpu->mmio_needed = 1;
4368 vcpu->mmio_cur_fragment = 0;
4369
4370 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
4371 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
4372 vcpu->run->exit_reason = KVM_EXIT_MMIO;
4373 vcpu->run->mmio.phys_addr = gpa;
4374
4375 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
4376}
4377
4378static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
4379 unsigned long addr,
4380 void *val,
4381 unsigned int bytes,
4382 struct x86_exception *exception)
4383{
4384 return emulator_read_write(ctxt, addr, val, bytes,
4385 exception, &read_emultor);
4386}
4387
4388int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
4389 unsigned long addr,
4390 const void *val,
4391 unsigned int bytes,
4392 struct x86_exception *exception)
4393{
4394 return emulator_read_write(ctxt, addr, (void *)val, bytes,
4395 exception, &write_emultor);
4396}
4397
4398#define CMPXCHG_TYPE(t, ptr, old, new) \
4399 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4400
4401#ifdef CONFIG_X86_64
4402# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4403#else
4404# define CMPXCHG64(ptr, old, new) \
4405 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4406#endif
4407
4408static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4409 unsigned long addr,
4410 const void *old,
4411 const void *new,
4412 unsigned int bytes,
4413 struct x86_exception *exception)
4414{
4415 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4416 gpa_t gpa;
4417 struct page *page;
4418 char *kaddr;
4419 bool exchanged;
4420
4421
4422 if (bytes > 8 || (bytes & (bytes - 1)))
4423 goto emul_write;
4424
4425 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4426
4427 if (gpa == UNMAPPED_GVA ||
4428 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4429 goto emul_write;
4430
4431 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4432 goto emul_write;
4433
4434 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4435 if (is_error_page(page))
4436 goto emul_write;
4437
4438 kaddr = kmap_atomic(page);
4439 kaddr += offset_in_page(gpa);
4440 switch (bytes) {
4441 case 1:
4442 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4443 break;
4444 case 2:
4445 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4446 break;
4447 case 4:
4448 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4449 break;
4450 case 8:
4451 exchanged = CMPXCHG64(kaddr, old, new);
4452 break;
4453 default:
4454 BUG();
4455 }
4456 kunmap_atomic(kaddr);
4457 kvm_release_page_dirty(page);
4458
4459 if (!exchanged)
4460 return X86EMUL_CMPXCHG_FAILED;
4461
4462 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
4463 kvm_mmu_pte_write(vcpu, gpa, new, bytes);
4464
4465 return X86EMUL_CONTINUE;
4466
4467emul_write:
4468 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4469
4470 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4471}
4472
4473static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4474{
4475
4476 int r;
4477
4478 if (vcpu->arch.pio.in)
4479 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
4480 vcpu->arch.pio.size, pd);
4481 else
4482 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
4483 vcpu->arch.pio.port, vcpu->arch.pio.size,
4484 pd);
4485 return r;
4486}
4487
4488static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4489 unsigned short port, void *val,
4490 unsigned int count, bool in)
4491{
4492 trace_kvm_pio(!in, port, size, count);
4493
4494 vcpu->arch.pio.port = port;
4495 vcpu->arch.pio.in = in;
4496 vcpu->arch.pio.count = count;
4497 vcpu->arch.pio.size = size;
4498
4499 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4500 vcpu->arch.pio.count = 0;
4501 return 1;
4502 }
4503
4504 vcpu->run->exit_reason = KVM_EXIT_IO;
4505 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
4506 vcpu->run->io.size = size;
4507 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4508 vcpu->run->io.count = count;
4509 vcpu->run->io.port = port;
4510
4511 return 0;
4512}
4513
4514static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4515 int size, unsigned short port, void *val,
4516 unsigned int count)
4517{
4518 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4519 int ret;
4520
4521 if (vcpu->arch.pio.count)
4522 goto data_avail;
4523
4524 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
4525 if (ret) {
4526data_avail:
4527 memcpy(val, vcpu->arch.pio_data, size * count);
4528 vcpu->arch.pio.count = 0;
4529 return 1;
4530 }
4531
4532 return 0;
4533}
4534
4535static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4536 int size, unsigned short port,
4537 const void *val, unsigned int count)
4538{
4539 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4540
4541 memcpy(vcpu->arch.pio_data, val, size * count);
4542 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4543}
4544
4545static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4546{
4547 return kvm_x86_ops->get_segment_base(vcpu, seg);
4548}
4549
4550static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4551{
4552 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4553}
4554
4555int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4556{
4557 if (!need_emulate_wbinvd(vcpu))
4558 return X86EMUL_CONTINUE;
4559
4560 if (kvm_x86_ops->has_wbinvd_exit()) {
4561 int cpu = get_cpu();
4562
4563 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4564 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4565 wbinvd_ipi, NULL, 1);
4566 put_cpu();
4567 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4568 } else
4569 wbinvd();
4570 return X86EMUL_CONTINUE;
4571}
4572EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4573
4574static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4575{
4576 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4577}
4578
4579int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4580{
4581 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4582}
4583
4584int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4585{
4586
4587 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4588}
4589
4590static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4591{
4592 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4593}
4594
4595static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4596{
4597 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4598 unsigned long value;
4599
4600 switch (cr) {
4601 case 0:
4602 value = kvm_read_cr0(vcpu);
4603 break;
4604 case 2:
4605 value = vcpu->arch.cr2;
4606 break;
4607 case 3:
4608 value = kvm_read_cr3(vcpu);
4609 break;
4610 case 4:
4611 value = kvm_read_cr4(vcpu);
4612 break;
4613 case 8:
4614 value = kvm_get_cr8(vcpu);
4615 break;
4616 default:
4617 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4618 return 0;
4619 }
4620
4621 return value;
4622}
4623
4624static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4625{
4626 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4627 int res = 0;
4628
4629 switch (cr) {
4630 case 0:
4631 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4632 break;
4633 case 2:
4634 vcpu->arch.cr2 = val;
4635 break;
4636 case 3:
4637 res = kvm_set_cr3(vcpu, val);
4638 break;
4639 case 4:
4640 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4641 break;
4642 case 8:
4643 res = kvm_set_cr8(vcpu, val);
4644 break;
4645 default:
4646 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4647 res = -1;
4648 }
4649
4650 return res;
4651}
4652
4653static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
4654{
4655 kvm_set_rflags(emul_to_vcpu(ctxt), val);
4656}
4657
4658static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4659{
4660 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4661}
4662
4663static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4664{
4665 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4666}
4667
4668static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4669{
4670 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4671}
4672
4673static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4674{
4675 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4676}
4677
4678static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4679{
4680 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4681}
4682
4683static unsigned long emulator_get_cached_segment_base(
4684 struct x86_emulate_ctxt *ctxt, int seg)
4685{
4686 return get_segment_base(emul_to_vcpu(ctxt), seg);
4687}
4688
4689static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4690 struct desc_struct *desc, u32 *base3,
4691 int seg)
4692{
4693 struct kvm_segment var;
4694
4695 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4696 *selector = var.selector;
4697
4698 if (var.unusable) {
4699 memset(desc, 0, sizeof(*desc));
4700 return false;
4701 }
4702
4703 if (var.g)
4704 var.limit >>= 12;
4705 set_desc_limit(desc, var.limit);
4706 set_desc_base(desc, (unsigned long)var.base);
4707#ifdef CONFIG_X86_64
4708 if (base3)
4709 *base3 = var.base >> 32;
4710#endif
4711 desc->type = var.type;
4712 desc->s = var.s;
4713 desc->dpl = var.dpl;
4714 desc->p = var.present;
4715 desc->avl = var.avl;
4716 desc->l = var.l;
4717 desc->d = var.db;
4718 desc->g = var.g;
4719
4720 return true;
4721}
4722
4723static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4724 struct desc_struct *desc, u32 base3,
4725 int seg)
4726{
4727 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4728 struct kvm_segment var;
4729
4730 var.selector = selector;
4731 var.base = get_desc_base(desc);
4732#ifdef CONFIG_X86_64
4733 var.base |= ((u64)base3) << 32;
4734#endif
4735 var.limit = get_desc_limit(desc);
4736 if (desc->g)
4737 var.limit = (var.limit << 12) | 0xfff;
4738 var.type = desc->type;
4739 var.present = desc->p;
4740 var.dpl = desc->dpl;
4741 var.db = desc->d;
4742 var.s = desc->s;
4743 var.l = desc->l;
4744 var.g = desc->g;
4745 var.avl = desc->avl;
4746 var.present = desc->p;
4747 var.unusable = !var.present;
4748 var.padding = 0;
4749
4750 kvm_set_segment(vcpu, &var, seg);
4751 return;
4752}
4753
4754static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4755 u32 msr_index, u64 *pdata)
4756{
4757 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4758}
4759
4760static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4761 u32 msr_index, u64 data)
4762{
4763 struct msr_data msr;
4764
4765 msr.data = data;
4766 msr.index = msr_index;
4767 msr.host_initiated = false;
4768 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4769}
4770
4771static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
4772 u32 pmc, u64 *pdata)
4773{
4774 return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
4775}
4776
4777static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4778{
4779 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4780}
4781
4782static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4783{
4784 preempt_disable();
4785 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4786
4787
4788
4789
4790 clts();
4791}
4792
4793static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4794{
4795 preempt_enable();
4796}
4797
4798static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4799 struct x86_instruction_info *info,
4800 enum x86_intercept_stage stage)
4801{
4802 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4803}
4804
4805static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4806 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4807{
4808 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
4809}
4810
4811static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
4812{
4813 return kvm_register_read(emul_to_vcpu(ctxt), reg);
4814}
4815
4816static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
4817{
4818 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
4819}
4820
4821static const struct x86_emulate_ops emulate_ops = {
4822 .read_gpr = emulator_read_gpr,
4823 .write_gpr = emulator_write_gpr,
4824 .read_std = kvm_read_guest_virt_system,
4825 .write_std = kvm_write_guest_virt_system,
4826 .fetch = kvm_fetch_guest_virt,
4827 .read_emulated = emulator_read_emulated,
4828 .write_emulated = emulator_write_emulated,
4829 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4830 .invlpg = emulator_invlpg,
4831 .pio_in_emulated = emulator_pio_in_emulated,
4832 .pio_out_emulated = emulator_pio_out_emulated,
4833 .get_segment = emulator_get_segment,
4834 .set_segment = emulator_set_segment,
4835 .get_cached_segment_base = emulator_get_cached_segment_base,
4836 .get_gdt = emulator_get_gdt,
4837 .get_idt = emulator_get_idt,
4838 .set_gdt = emulator_set_gdt,
4839 .set_idt = emulator_set_idt,
4840 .get_cr = emulator_get_cr,
4841 .set_cr = emulator_set_cr,
4842 .set_rflags = emulator_set_rflags,
4843 .cpl = emulator_get_cpl,
4844 .get_dr = emulator_get_dr,
4845 .set_dr = emulator_set_dr,
4846 .set_msr = emulator_set_msr,
4847 .get_msr = emulator_get_msr,
4848 .read_pmc = emulator_read_pmc,
4849 .halt = emulator_halt,
4850 .wbinvd = emulator_wbinvd,
4851 .fix_hypercall = emulator_fix_hypercall,
4852 .get_fpu = emulator_get_fpu,
4853 .put_fpu = emulator_put_fpu,
4854 .intercept = emulator_intercept,
4855 .get_cpuid = emulator_get_cpuid,
4856};
4857
4858static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4859{
4860 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4861
4862
4863
4864
4865
4866
4867
4868 if (!(int_shadow & mask))
4869 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4870}
4871
4872static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4873{
4874 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4875 if (ctxt->exception.vector == PF_VECTOR)
4876 kvm_propagate_fault(vcpu, &ctxt->exception);
4877 else if (ctxt->exception.error_code_valid)
4878 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4879 ctxt->exception.error_code);
4880 else
4881 kvm_queue_exception(vcpu, ctxt->exception.vector);
4882}
4883
4884static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4885{
4886 memset(&ctxt->opcode_len, 0,
4887 (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
4888
4889 ctxt->fetch.start = 0;
4890 ctxt->fetch.end = 0;
4891 ctxt->io_read.pos = 0;
4892 ctxt->io_read.end = 0;
4893 ctxt->mem_read.pos = 0;
4894 ctxt->mem_read.end = 0;
4895}
4896
4897static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4898{
4899 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4900 int cs_db, cs_l;
4901
4902 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4903
4904 ctxt->eflags = kvm_get_rflags(vcpu);
4905 ctxt->eip = kvm_rip_read(vcpu);
4906 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4907 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4908 cs_l ? X86EMUL_MODE_PROT64 :
4909 cs_db ? X86EMUL_MODE_PROT32 :
4910 X86EMUL_MODE_PROT16;
4911 ctxt->guest_mode = is_guest_mode(vcpu);
4912
4913 init_decode_cache(ctxt);
4914 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4915}
4916
4917int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4918{
4919 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4920 int ret;
4921
4922 init_emulate_ctxt(vcpu);
4923
4924 ctxt->op_bytes = 2;
4925 ctxt->ad_bytes = 2;
4926 ctxt->_eip = ctxt->eip + inc_eip;
4927 ret = emulate_int_real(ctxt, irq);
4928
4929 if (ret != X86EMUL_CONTINUE)
4930 return EMULATE_FAIL;
4931
4932 ctxt->eip = ctxt->_eip;
4933 kvm_rip_write(vcpu, ctxt->eip);
4934 kvm_set_rflags(vcpu, ctxt->eflags);
4935
4936 if (irq == NMI_VECTOR)
4937 vcpu->arch.nmi_pending = 0;
4938 else
4939 vcpu->arch.interrupt.pending = false;
4940
4941 return EMULATE_DONE;
4942}
4943EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4944
4945static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4946{
4947 int r = EMULATE_DONE;
4948
4949 ++vcpu->stat.insn_emulation_fail;
4950 trace_kvm_emulate_insn_failed(vcpu);
4951 if (!is_guest_mode(vcpu)) {
4952 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4953 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4954 vcpu->run->internal.ndata = 0;
4955 r = EMULATE_FAIL;
4956 }
4957 kvm_queue_exception(vcpu, UD_VECTOR);
4958
4959 return r;
4960}
4961
4962static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
4963 bool write_fault_to_shadow_pgtable,
4964 int emulation_type)
4965{
4966 gpa_t gpa = cr2;
4967 pfn_t pfn;
4968
4969 if (emulation_type & EMULTYPE_NO_REEXECUTE)
4970 return false;
4971
4972 if (!vcpu->arch.mmu.direct_map) {
4973
4974
4975
4976
4977 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4978
4979
4980
4981
4982
4983 if (gpa == UNMAPPED_GVA)
4984 return true;
4985 }
4986
4987
4988
4989
4990
4991
4992
4993 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4994
4995
4996
4997
4998
4999 if (is_error_noslot_pfn(pfn))
5000 return false;
5001
5002 kvm_release_pfn_clean(pfn);
5003
5004
5005 if (vcpu->arch.mmu.direct_map) {
5006 unsigned int indirect_shadow_pages;
5007
5008 spin_lock(&vcpu->kvm->mmu_lock);
5009 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
5010 spin_unlock(&vcpu->kvm->mmu_lock);
5011
5012 if (indirect_shadow_pages)
5013 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5014
5015 return true;
5016 }
5017
5018
5019
5020
5021
5022
5023 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5024
5025
5026
5027
5028
5029
5030 return !write_fault_to_shadow_pgtable;
5031}
5032
5033static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
5034 unsigned long cr2, int emulation_type)
5035{
5036 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5037 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
5038
5039 last_retry_eip = vcpu->arch.last_retry_eip;
5040 last_retry_addr = vcpu->arch.last_retry_addr;
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
5056
5057 if (!(emulation_type & EMULTYPE_RETRY))
5058 return false;
5059
5060 if (x86_page_table_writing_insn(ctxt))
5061 return false;
5062
5063 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
5064 return false;
5065
5066 vcpu->arch.last_retry_eip = ctxt->eip;
5067 vcpu->arch.last_retry_addr = cr2;
5068
5069 if (!vcpu->arch.mmu.direct_map)
5070 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5071
5072 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5073
5074 return true;
5075}
5076
5077static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
5078static int complete_emulated_pio(struct kvm_vcpu *vcpu);
5079
5080static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
5081 unsigned long *db)
5082{
5083 u32 dr6 = 0;
5084 int i;
5085 u32 enable, rwlen;
5086
5087 enable = dr7;
5088 rwlen = dr7 >> 16;
5089 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
5090 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
5091 dr6 |= (1 << i);
5092 return dr6;
5093}
5094
5095static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
5096{
5097 struct kvm_run *kvm_run = vcpu->run;
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5108
5109 if (unlikely(rflags & X86_EFLAGS_TF)) {
5110 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5111 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
5112 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
5113 kvm_run->debug.arch.exception = DB_VECTOR;
5114 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5115 *r = EMULATE_USER_EXIT;
5116 } else {
5117 vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
5118
5119
5120
5121
5122
5123 vcpu->arch.dr6 &= ~15;
5124 vcpu->arch.dr6 |= DR6_BS;
5125 kvm_queue_exception(vcpu, DB_VECTOR);
5126 }
5127 }
5128}
5129
5130static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5131{
5132 struct kvm_run *kvm_run = vcpu->run;
5133 unsigned long eip = vcpu->arch.emulate_ctxt.eip;
5134 u32 dr6 = 0;
5135
5136 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
5137 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
5138 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5139 vcpu->arch.guest_debug_dr7,
5140 vcpu->arch.eff_db);
5141
5142 if (dr6 != 0) {
5143 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
5144 kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
5145 get_segment_base(vcpu, VCPU_SREG_CS);
5146
5147 kvm_run->debug.arch.exception = DB_VECTOR;
5148 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5149 *r = EMULATE_USER_EXIT;
5150 return true;
5151 }
5152 }
5153
5154 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) {
5155 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5156 vcpu->arch.dr7,
5157 vcpu->arch.db);
5158
5159 if (dr6 != 0) {
5160 vcpu->arch.dr6 &= ~15;
5161 vcpu->arch.dr6 |= dr6;
5162 kvm_queue_exception(vcpu, DB_VECTOR);
5163 *r = EMULATE_DONE;
5164 return true;
5165 }
5166 }
5167
5168 return false;
5169}
5170
5171int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5172 unsigned long cr2,
5173 int emulation_type,
5174 void *insn,
5175 int insn_len)
5176{
5177 int r;
5178 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5179 bool writeback = true;
5180 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
5181
5182
5183
5184
5185
5186 vcpu->arch.write_fault_to_shadow_pgtable = false;
5187 kvm_clear_exception_queue(vcpu);
5188
5189 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
5190 init_emulate_ctxt(vcpu);
5191
5192
5193
5194
5195
5196
5197
5198 if (kvm_vcpu_check_breakpoint(vcpu, &r))
5199 return r;
5200
5201 ctxt->interruptibility = 0;
5202 ctxt->have_exception = false;
5203 ctxt->perm_ok = false;
5204
5205 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
5206
5207 r = x86_decode_insn(ctxt, insn, insn_len);
5208
5209 trace_kvm_emulate_insn_start(vcpu);
5210 ++vcpu->stat.insn_emulation;
5211 if (r != EMULATION_OK) {
5212 if (emulation_type & EMULTYPE_TRAP_UD)
5213 return EMULATE_FAIL;
5214 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5215 emulation_type))
5216 return EMULATE_DONE;
5217 if (emulation_type & EMULTYPE_SKIP)
5218 return EMULATE_FAIL;
5219 return handle_emulation_failure(vcpu);
5220 }
5221 }
5222
5223 if (emulation_type & EMULTYPE_SKIP) {
5224 kvm_rip_write(vcpu, ctxt->_eip);
5225 return EMULATE_DONE;
5226 }
5227
5228 if (retry_instruction(ctxt, cr2, emulation_type))
5229 return EMULATE_DONE;
5230
5231
5232
5233 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
5234 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5235 emulator_invalidate_register_cache(ctxt);
5236 }
5237
5238restart:
5239 r = x86_emulate_insn(ctxt);
5240
5241 if (r == EMULATION_INTERCEPTED)
5242 return EMULATE_DONE;
5243
5244 if (r == EMULATION_FAILED) {
5245 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5246 emulation_type))
5247 return EMULATE_DONE;
5248
5249 return handle_emulation_failure(vcpu);
5250 }
5251
5252 if (ctxt->have_exception) {
5253 inject_emulated_exception(vcpu);
5254 r = EMULATE_DONE;
5255 } else if (vcpu->arch.pio.count) {
5256 if (!vcpu->arch.pio.in) {
5257
5258 vcpu->arch.pio.count = 0;
5259 } else {
5260 writeback = false;
5261 vcpu->arch.complete_userspace_io = complete_emulated_pio;
5262 }
5263 r = EMULATE_USER_EXIT;
5264 } else if (vcpu->mmio_needed) {
5265 if (!vcpu->mmio_is_write)
5266 writeback = false;
5267 r = EMULATE_USER_EXIT;
5268 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
5269 } else if (r == EMULATION_RESTART)
5270 goto restart;
5271 else
5272 r = EMULATE_DONE;
5273
5274 if (writeback) {
5275 toggle_interruptibility(vcpu, ctxt->interruptibility);
5276 kvm_make_request(KVM_REQ_EVENT, vcpu);
5277 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5278 kvm_rip_write(vcpu, ctxt->eip);
5279 if (r == EMULATE_DONE)
5280 kvm_vcpu_check_singlestep(vcpu, &r);
5281 kvm_set_rflags(vcpu, ctxt->eflags);
5282 } else
5283 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
5284
5285 return r;
5286}
5287EXPORT_SYMBOL_GPL(x86_emulate_instruction);
5288
5289int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
5290{
5291 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
5292 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
5293 size, port, &val, 1);
5294
5295 vcpu->arch.pio.count = 0;
5296 return ret;
5297}
5298EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
5299
5300static void tsc_bad(void *info)
5301{
5302 __this_cpu_write(cpu_tsc_khz, 0);
5303}
5304
5305static void tsc_khz_changed(void *data)
5306{
5307 struct cpufreq_freqs *freq = data;
5308 unsigned long khz = 0;
5309
5310 if (data)
5311 khz = freq->new;
5312 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5313 khz = cpufreq_quick_get(raw_smp_processor_id());
5314 if (!khz)
5315 khz = tsc_khz;
5316 __this_cpu_write(cpu_tsc_khz, khz);
5317}
5318
5319static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
5320 void *data)
5321{
5322 struct cpufreq_freqs *freq = data;
5323 struct kvm *kvm;
5324 struct kvm_vcpu *vcpu;
5325 int i, send_ipi = 0;
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
5367 return 0;
5368 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
5369 return 0;
5370
5371 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5372
5373 spin_lock(&kvm_lock);
5374 list_for_each_entry(kvm, &vm_list, vm_list) {
5375 kvm_for_each_vcpu(i, vcpu, kvm) {
5376 if (vcpu->cpu != freq->cpu)
5377 continue;
5378 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5379 if (vcpu->cpu != smp_processor_id())
5380 send_ipi = 1;
5381 }
5382 }
5383 spin_unlock(&kvm_lock);
5384
5385 if (freq->old < freq->new && send_ipi) {
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5399 }
5400 return 0;
5401}
5402
5403static struct notifier_block kvmclock_cpufreq_notifier_block = {
5404 .notifier_call = kvmclock_cpufreq_notifier
5405};
5406
5407static int kvmclock_cpu_notifier(struct notifier_block *nfb,
5408 unsigned long action, void *hcpu)
5409{
5410 unsigned int cpu = (unsigned long)hcpu;
5411
5412 switch (action) {
5413 case CPU_ONLINE:
5414 case CPU_DOWN_FAILED:
5415 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5416 break;
5417 case CPU_DOWN_PREPARE:
5418 smp_call_function_single(cpu, tsc_bad, NULL, 1);
5419 break;
5420 }
5421 return NOTIFY_OK;
5422}
5423
5424static struct notifier_block kvmclock_cpu_notifier_block = {
5425 .notifier_call = kvmclock_cpu_notifier,
5426 .priority = -INT_MAX
5427};
5428
5429static void kvm_timer_init(void)
5430{
5431 int cpu;
5432
5433 max_tsc_khz = tsc_khz;
5434
5435 cpu_notifier_register_begin();
5436 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5437#ifdef CONFIG_CPU_FREQ
5438 struct cpufreq_policy policy;
5439 memset(&policy, 0, sizeof(policy));
5440 cpu = get_cpu();
5441 cpufreq_get_policy(&policy, cpu);
5442 if (policy.cpuinfo.max_freq)
5443 max_tsc_khz = policy.cpuinfo.max_freq;
5444 put_cpu();
5445#endif
5446 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
5447 CPUFREQ_TRANSITION_NOTIFIER);
5448 }
5449 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
5450 for_each_online_cpu(cpu)
5451 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5452
5453 __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5454 cpu_notifier_register_done();
5455
5456}
5457
5458static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
5459
5460int kvm_is_in_guest(void)
5461{
5462 return __this_cpu_read(current_vcpu) != NULL;
5463}
5464
5465static int kvm_is_user_mode(void)
5466{
5467 int user_mode = 3;
5468
5469 if (__this_cpu_read(current_vcpu))
5470 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
5471
5472 return user_mode != 0;
5473}
5474
5475static unsigned long kvm_get_guest_ip(void)
5476{
5477 unsigned long ip = 0;
5478
5479 if (__this_cpu_read(current_vcpu))
5480 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
5481
5482 return ip;
5483}
5484
5485static struct perf_guest_info_callbacks kvm_guest_cbs = {
5486 .is_in_guest = kvm_is_in_guest,
5487 .is_user_mode = kvm_is_user_mode,
5488 .get_guest_ip = kvm_get_guest_ip,
5489};
5490
5491void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
5492{
5493 __this_cpu_write(current_vcpu, vcpu);
5494}
5495EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
5496
5497void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
5498{
5499 __this_cpu_write(current_vcpu, NULL);
5500}
5501EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
5502
5503static void kvm_set_mmio_spte_mask(void)
5504{
5505 u64 mask;
5506 int maxphyaddr = boot_cpu_data.x86_phys_bits;
5507
5508
5509
5510
5511
5512
5513 mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
5514
5515
5516 mask |= 0x3ull << 62;
5517
5518
5519 mask |= 1ull;
5520
5521#ifdef CONFIG_X86_64
5522
5523
5524
5525
5526 if (maxphyaddr == 52)
5527 mask &= ~1ull;
5528#endif
5529
5530 kvm_mmu_set_mmio_spte_mask(mask);
5531}
5532
5533#ifdef CONFIG_X86_64
5534static void pvclock_gtod_update_fn(struct work_struct *work)
5535{
5536 struct kvm *kvm;
5537
5538 struct kvm_vcpu *vcpu;
5539 int i;
5540
5541 spin_lock(&kvm_lock);
5542 list_for_each_entry(kvm, &vm_list, vm_list)
5543 kvm_for_each_vcpu(i, vcpu, kvm)
5544 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5545 atomic_set(&kvm_guest_has_master_clock, 0);
5546 spin_unlock(&kvm_lock);
5547}
5548
5549static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
5550
5551
5552
5553
5554static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
5555 void *priv)
5556{
5557 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
5558 struct timekeeper *tk = priv;
5559
5560 update_pvclock_gtod(tk);
5561
5562
5563
5564
5565 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
5566 atomic_read(&kvm_guest_has_master_clock) != 0)
5567 queue_work(system_long_wq, &pvclock_gtod_work);
5568
5569 return 0;
5570}
5571
5572static struct notifier_block pvclock_gtod_notifier = {
5573 .notifier_call = pvclock_gtod_notify,
5574};
5575#endif
5576
5577int kvm_arch_init(void *opaque)
5578{
5579 int r;
5580 struct kvm_x86_ops *ops = opaque;
5581
5582 if (kvm_x86_ops) {
5583 printk(KERN_ERR "kvm: already loaded the other module\n");
5584 r = -EEXIST;
5585 goto out;
5586 }
5587
5588 if (!ops->cpu_has_kvm_support()) {
5589 printk(KERN_ERR "kvm: no hardware support\n");
5590 r = -EOPNOTSUPP;
5591 goto out;
5592 }
5593 if (ops->disabled_by_bios()) {
5594 printk(KERN_ERR "kvm: disabled by bios\n");
5595 r = -EOPNOTSUPP;
5596 goto out;
5597 }
5598
5599 r = -ENOMEM;
5600 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
5601 if (!shared_msrs) {
5602 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
5603 goto out;
5604 }
5605
5606 r = kvm_mmu_module_init();
5607 if (r)
5608 goto out_free_percpu;
5609
5610 kvm_set_mmio_spte_mask();
5611
5612 kvm_x86_ops = ops;
5613 kvm_init_msr_list();
5614
5615 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5616 PT_DIRTY_MASK, PT64_NX_MASK, 0);
5617
5618 kvm_timer_init();
5619
5620 perf_register_guest_info_callbacks(&kvm_guest_cbs);
5621
5622 if (cpu_has_xsave)
5623 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
5624
5625 kvm_lapic_init();
5626#ifdef CONFIG_X86_64
5627 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
5628#endif
5629
5630 return 0;
5631
5632out_free_percpu:
5633 free_percpu(shared_msrs);
5634out:
5635 return r;
5636}
5637
5638void kvm_arch_exit(void)
5639{
5640 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
5641
5642 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5643 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
5644 CPUFREQ_TRANSITION_NOTIFIER);
5645 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5646#ifdef CONFIG_X86_64
5647 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
5648#endif
5649 kvm_x86_ops = NULL;
5650 kvm_mmu_module_exit();
5651 free_percpu(shared_msrs);
5652}
5653
5654int kvm_emulate_halt(struct kvm_vcpu *vcpu)
5655{
5656 ++vcpu->stat.halt_exits;
5657 if (irqchip_in_kernel(vcpu->kvm)) {
5658 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
5659 return 1;
5660 } else {
5661 vcpu->run->exit_reason = KVM_EXIT_HLT;
5662 return 0;
5663 }
5664}
5665EXPORT_SYMBOL_GPL(kvm_emulate_halt);
5666
5667int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5668{
5669 u64 param, ingpa, outgpa, ret;
5670 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5671 bool fast, longmode;
5672 int cs_db, cs_l;
5673
5674
5675
5676
5677
5678 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
5679 kvm_queue_exception(vcpu, UD_VECTOR);
5680 return 0;
5681 }
5682
5683 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
5684 longmode = is_long_mode(vcpu) && cs_l == 1;
5685
5686 if (!longmode) {
5687 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
5688 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
5689 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
5690 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
5691 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
5692 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
5693 }
5694#ifdef CONFIG_X86_64
5695 else {
5696 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
5697 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
5698 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
5699 }
5700#endif
5701
5702 code = param & 0xffff;
5703 fast = (param >> 16) & 0x1;
5704 rep_cnt = (param >> 32) & 0xfff;
5705 rep_idx = (param >> 48) & 0xfff;
5706
5707 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
5708
5709 switch (code) {
5710 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5711 kvm_vcpu_on_spin(vcpu);
5712 break;
5713 default:
5714 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5715 break;
5716 }
5717
5718 ret = res | (((u64)rep_done & 0xfff) << 32);
5719 if (longmode) {
5720 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5721 } else {
5722 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5723 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5724 }
5725
5726 return 1;
5727}
5728
5729
5730
5731
5732
5733
5734static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
5735{
5736 struct kvm_lapic_irq lapic_irq;
5737
5738 lapic_irq.shorthand = 0;
5739 lapic_irq.dest_mode = 0;
5740 lapic_irq.dest_id = apicid;
5741
5742 lapic_irq.delivery_mode = APIC_DM_REMRD;
5743 kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
5744}
5745
5746int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5747{
5748 unsigned long nr, a0, a1, a2, a3, ret;
5749 int r = 1;
5750
5751 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5752 return kvm_hv_hypercall(vcpu);
5753
5754 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5755 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5756 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5757 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5758 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5759
5760 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5761
5762 if (!is_long_mode(vcpu)) {
5763 nr &= 0xFFFFFFFF;
5764 a0 &= 0xFFFFFFFF;
5765 a1 &= 0xFFFFFFFF;
5766 a2 &= 0xFFFFFFFF;
5767 a3 &= 0xFFFFFFFF;
5768 }
5769
5770 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5771 ret = -KVM_EPERM;
5772 goto out;
5773 }
5774
5775 switch (nr) {
5776 case KVM_HC_VAPIC_POLL_IRQ:
5777 ret = 0;
5778 break;
5779 case KVM_HC_KICK_CPU:
5780 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
5781 ret = 0;
5782 break;
5783 default:
5784 ret = -KVM_ENOSYS;
5785 break;
5786 }
5787out:
5788 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5789 ++vcpu->stat.hypercalls;
5790 return r;
5791}
5792EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5793
5794static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5795{
5796 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5797 char instruction[3];
5798 unsigned long rip = kvm_rip_read(vcpu);
5799
5800 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5801
5802 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
5803}
5804
5805
5806
5807
5808
5809
5810
5811static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5812{
5813 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5814 vcpu->run->request_interrupt_window &&
5815 kvm_arch_interrupt_allowed(vcpu));
5816}
5817
5818static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5819{
5820 struct kvm_run *kvm_run = vcpu->run;
5821
5822 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5823 kvm_run->cr8 = kvm_get_cr8(vcpu);
5824 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5825 if (irqchip_in_kernel(vcpu->kvm))
5826 kvm_run->ready_for_interrupt_injection = 1;
5827 else
5828 kvm_run->ready_for_interrupt_injection =
5829 kvm_arch_interrupt_allowed(vcpu) &&
5830 !kvm_cpu_has_interrupt(vcpu) &&
5831 !kvm_event_needs_reinjection(vcpu);
5832}
5833
5834static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5835{
5836 int max_irr, tpr;
5837
5838 if (!kvm_x86_ops->update_cr8_intercept)
5839 return;
5840
5841 if (!vcpu->arch.apic)
5842 return;
5843
5844 if (!vcpu->arch.apic->vapic_addr)
5845 max_irr = kvm_lapic_find_highest_irr(vcpu);
5846 else
5847 max_irr = -1;
5848
5849 if (max_irr != -1)
5850 max_irr >>= 4;
5851
5852 tpr = kvm_lapic_get_cr8(vcpu);
5853
5854 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5855}
5856
5857static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
5858{
5859 int r;
5860
5861
5862 if (vcpu->arch.exception.pending) {
5863 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5864 vcpu->arch.exception.has_error_code,
5865 vcpu->arch.exception.error_code);
5866 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5867 vcpu->arch.exception.has_error_code,
5868 vcpu->arch.exception.error_code,
5869 vcpu->arch.exception.reinject);
5870 return 0;
5871 }
5872
5873 if (vcpu->arch.nmi_injected) {
5874 kvm_x86_ops->set_nmi(vcpu);
5875 return 0;
5876 }
5877
5878 if (vcpu->arch.interrupt.pending) {
5879 kvm_x86_ops->set_irq(vcpu);
5880 return 0;
5881 }
5882
5883 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
5884 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
5885 if (r != 0)
5886 return r;
5887 }
5888
5889
5890 if (vcpu->arch.nmi_pending) {
5891 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5892 --vcpu->arch.nmi_pending;
5893 vcpu->arch.nmi_injected = true;
5894 kvm_x86_ops->set_nmi(vcpu);
5895 }
5896 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5897 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5898 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5899 false);
5900 kvm_x86_ops->set_irq(vcpu);
5901 }
5902 }
5903 return 0;
5904}
5905
5906static void process_nmi(struct kvm_vcpu *vcpu)
5907{
5908 unsigned limit = 2;
5909
5910
5911
5912
5913
5914
5915 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
5916 limit = 1;
5917
5918 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
5919 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
5920 kvm_make_request(KVM_REQ_EVENT, vcpu);
5921}
5922
5923static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
5924{
5925 u64 eoi_exit_bitmap[4];
5926 u32 tmr[8];
5927
5928 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
5929 return;
5930
5931 memset(eoi_exit_bitmap, 0, 32);
5932 memset(tmr, 0, 32);
5933
5934 kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
5935 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
5936 kvm_apic_update_tmr(vcpu, tmr);
5937}
5938
5939
5940
5941
5942
5943
5944static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5945{
5946 int r;
5947 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5948 vcpu->run->request_interrupt_window;
5949 bool req_immediate_exit = false;
5950
5951 if (vcpu->requests) {
5952 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
5953 kvm_mmu_unload(vcpu);
5954 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5955 __kvm_migrate_timers(vcpu);
5956 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
5957 kvm_gen_update_masterclock(vcpu->kvm);
5958 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
5959 kvm_gen_kvmclock_update(vcpu);
5960 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5961 r = kvm_guest_time_update(vcpu);
5962 if (unlikely(r))
5963 goto out;
5964 }
5965 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
5966 kvm_mmu_sync_roots(vcpu);
5967 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
5968 kvm_x86_ops->tlb_flush(vcpu);
5969 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
5970 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
5971 r = 0;
5972 goto out;
5973 }
5974 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
5975 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5976 r = 0;
5977 goto out;
5978 }
5979 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
5980 vcpu->fpu_active = 0;
5981 kvm_x86_ops->fpu_deactivate(vcpu);
5982 }
5983 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5984
5985 vcpu->arch.apf.halted = true;
5986 r = 1;
5987 goto out;
5988 }
5989 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
5990 record_steal_time(vcpu);
5991 if (kvm_check_request(KVM_REQ_NMI, vcpu))
5992 process_nmi(vcpu);
5993 if (kvm_check_request(KVM_REQ_PMU, vcpu))
5994 kvm_handle_pmu_event(vcpu);
5995 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5996 kvm_deliver_pmi(vcpu);
5997 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
5998 vcpu_scan_ioapic(vcpu);
5999 }
6000
6001 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
6002 kvm_apic_accept_events(vcpu);
6003 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
6004 r = 1;
6005 goto out;
6006 }
6007
6008 if (inject_pending_event(vcpu, req_int_win) != 0)
6009 req_immediate_exit = true;
6010
6011 else if (vcpu->arch.nmi_pending)
6012 kvm_x86_ops->enable_nmi_window(vcpu);
6013 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
6014 kvm_x86_ops->enable_irq_window(vcpu);
6015
6016 if (kvm_lapic_enabled(vcpu)) {
6017
6018
6019
6020
6021 if (kvm_x86_ops->hwapic_irr_update)
6022 kvm_x86_ops->hwapic_irr_update(vcpu,
6023 kvm_lapic_find_highest_irr(vcpu));
6024 update_cr8_intercept(vcpu);
6025 kvm_lapic_sync_to_vapic(vcpu);
6026 }
6027 }
6028
6029 r = kvm_mmu_reload(vcpu);
6030 if (unlikely(r)) {
6031 goto cancel_injection;
6032 }
6033
6034 preempt_disable();
6035
6036 kvm_x86_ops->prepare_guest_switch(vcpu);
6037 if (vcpu->fpu_active)
6038 kvm_load_guest_fpu(vcpu);
6039 kvm_load_guest_xcr0(vcpu);
6040
6041 vcpu->mode = IN_GUEST_MODE;
6042
6043 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6044
6045
6046
6047
6048 smp_mb__after_srcu_read_unlock();
6049
6050 local_irq_disable();
6051
6052 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
6053 || need_resched() || signal_pending(current)) {
6054 vcpu->mode = OUTSIDE_GUEST_MODE;
6055 smp_wmb();
6056 local_irq_enable();
6057 preempt_enable();
6058 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6059 r = 1;
6060 goto cancel_injection;
6061 }
6062
6063 if (req_immediate_exit)
6064 smp_send_reschedule(vcpu->cpu);
6065
6066 kvm_guest_enter();
6067
6068 if (unlikely(vcpu->arch.switch_db_regs)) {
6069 set_debugreg(0, 7);
6070 set_debugreg(vcpu->arch.eff_db[0], 0);
6071 set_debugreg(vcpu->arch.eff_db[1], 1);
6072 set_debugreg(vcpu->arch.eff_db[2], 2);
6073 set_debugreg(vcpu->arch.eff_db[3], 3);
6074 set_debugreg(vcpu->arch.dr6, 6);
6075 }
6076
6077 trace_kvm_entry(vcpu->vcpu_id);
6078 kvm_x86_ops->run(vcpu);
6079
6080
6081
6082
6083
6084
6085
6086 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
6087 int i;
6088
6089 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
6090 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
6091 for (i = 0; i < KVM_NR_DB_REGS; i++)
6092 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6093 }
6094
6095
6096
6097
6098
6099
6100
6101
6102 if (hw_breakpoint_active())
6103 hw_breakpoint_restore();
6104
6105 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
6106 native_read_tsc());
6107
6108 vcpu->mode = OUTSIDE_GUEST_MODE;
6109 smp_wmb();
6110
6111
6112 kvm_x86_ops->handle_external_intr(vcpu);
6113
6114 ++vcpu->stat.exits;
6115
6116
6117
6118
6119
6120
6121
6122 barrier();
6123
6124 kvm_guest_exit();
6125
6126 preempt_enable();
6127
6128 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6129
6130
6131
6132
6133 if (unlikely(prof_on == KVM_PROFILING)) {
6134 unsigned long rip = kvm_rip_read(vcpu);
6135 profile_hit(KVM_PROFILING, (void *)rip);
6136 }
6137
6138 if (unlikely(vcpu->arch.tsc_always_catchup))
6139 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6140
6141 if (vcpu->arch.apic_attention)
6142 kvm_lapic_sync_from_vapic(vcpu);
6143
6144 r = kvm_x86_ops->handle_exit(vcpu);
6145 return r;
6146
6147cancel_injection:
6148 kvm_x86_ops->cancel_injection(vcpu);
6149 if (unlikely(vcpu->arch.apic_attention))
6150 kvm_lapic_sync_from_vapic(vcpu);
6151out:
6152 return r;
6153}
6154
6155
6156static int __vcpu_run(struct kvm_vcpu *vcpu)
6157{
6158 int r;
6159 struct kvm *kvm = vcpu->kvm;
6160
6161 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6162
6163 r = 1;
6164 while (r > 0) {
6165 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6166 !vcpu->arch.apf.halted)
6167 r = vcpu_enter_guest(vcpu);
6168 else {
6169 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6170 kvm_vcpu_block(vcpu);
6171 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6172 if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
6173 kvm_apic_accept_events(vcpu);
6174 switch(vcpu->arch.mp_state) {
6175 case KVM_MP_STATE_HALTED:
6176 vcpu->arch.pv.pv_unhalted = false;
6177 vcpu->arch.mp_state =
6178 KVM_MP_STATE_RUNNABLE;
6179 case KVM_MP_STATE_RUNNABLE:
6180 vcpu->arch.apf.halted = false;
6181 break;
6182 case KVM_MP_STATE_INIT_RECEIVED:
6183 break;
6184 default:
6185 r = -EINTR;
6186 break;
6187 }
6188 }
6189 }
6190
6191 if (r <= 0)
6192 break;
6193
6194 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
6195 if (kvm_cpu_has_pending_timer(vcpu))
6196 kvm_inject_pending_timer_irqs(vcpu);
6197
6198 if (dm_request_for_irq_injection(vcpu)) {
6199 r = -EINTR;
6200 vcpu->run->exit_reason = KVM_EXIT_INTR;
6201 ++vcpu->stat.request_irq_exits;
6202 }
6203
6204 kvm_check_async_pf_completion(vcpu);
6205
6206 if (signal_pending(current)) {
6207 r = -EINTR;
6208 vcpu->run->exit_reason = KVM_EXIT_INTR;
6209 ++vcpu->stat.signal_exits;
6210 }
6211 if (need_resched()) {
6212 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6213 cond_resched();
6214 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6215 }
6216 }
6217
6218 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6219
6220 return r;
6221}
6222
6223static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
6224{
6225 int r;
6226 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6227 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
6228 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6229 if (r != EMULATE_DONE)
6230 return 0;
6231 return 1;
6232}
6233
6234static int complete_emulated_pio(struct kvm_vcpu *vcpu)
6235{
6236 BUG_ON(!vcpu->arch.pio.count);
6237
6238 return complete_emulated_io(vcpu);
6239}
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
6260{
6261 struct kvm_run *run = vcpu->run;
6262 struct kvm_mmio_fragment *frag;
6263 unsigned len;
6264
6265 BUG_ON(!vcpu->mmio_needed);
6266
6267
6268 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
6269 len = min(8u, frag->len);
6270 if (!vcpu->mmio_is_write)
6271 memcpy(frag->data, run->mmio.data, len);
6272
6273 if (frag->len <= 8) {
6274
6275 frag++;
6276 vcpu->mmio_cur_fragment++;
6277 } else {
6278
6279 frag->data += len;
6280 frag->gpa += len;
6281 frag->len -= len;
6282 }
6283
6284 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
6285 vcpu->mmio_needed = 0;
6286
6287
6288 if (vcpu->mmio_is_write)
6289 return 1;
6290 vcpu->mmio_read_completed = 1;
6291 return complete_emulated_io(vcpu);
6292 }
6293
6294 run->exit_reason = KVM_EXIT_MMIO;
6295 run->mmio.phys_addr = frag->gpa;
6296 if (vcpu->mmio_is_write)
6297 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
6298 run->mmio.len = min(8u, frag->len);
6299 run->mmio.is_write = vcpu->mmio_is_write;
6300 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6301 return 0;
6302}
6303
6304
6305int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
6306{
6307 int r;
6308 sigset_t sigsaved;
6309
6310 if (!tsk_used_math(current) && init_fpu(current))
6311 return -ENOMEM;
6312
6313 if (vcpu->sigset_active)
6314 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
6315
6316 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
6317 kvm_vcpu_block(vcpu);
6318 kvm_apic_accept_events(vcpu);
6319 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
6320 r = -EAGAIN;
6321 goto out;
6322 }
6323
6324
6325 if (!irqchip_in_kernel(vcpu->kvm)) {
6326 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
6327 r = -EINVAL;
6328 goto out;
6329 }
6330 }
6331
6332 if (unlikely(vcpu->arch.complete_userspace_io)) {
6333 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
6334 vcpu->arch.complete_userspace_io = NULL;
6335 r = cui(vcpu);
6336 if (r <= 0)
6337 goto out;
6338 } else
6339 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
6340
6341 r = __vcpu_run(vcpu);
6342
6343out:
6344 post_kvm_run_save(vcpu);
6345 if (vcpu->sigset_active)
6346 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
6347
6348 return r;
6349}
6350
6351int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6352{
6353 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
6354
6355
6356
6357
6358
6359
6360
6361 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
6362 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6363 }
6364 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
6365 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
6366 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
6367 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
6368 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
6369 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
6370 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
6371 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
6372#ifdef CONFIG_X86_64
6373 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
6374 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
6375 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
6376 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
6377 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
6378 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
6379 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
6380 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
6381#endif
6382
6383 regs->rip = kvm_rip_read(vcpu);
6384 regs->rflags = kvm_get_rflags(vcpu);
6385
6386 return 0;
6387}
6388
6389int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6390{
6391 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
6392 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6393
6394 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
6395 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
6396 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
6397 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
6398 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
6399 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
6400 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
6401 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
6402#ifdef CONFIG_X86_64
6403 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
6404 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
6405 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
6406 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
6407 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
6408 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
6409 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
6410 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
6411#endif
6412
6413 kvm_rip_write(vcpu, regs->rip);
6414 kvm_set_rflags(vcpu, regs->rflags);
6415
6416 vcpu->arch.exception.pending = false;
6417
6418 kvm_make_request(KVM_REQ_EVENT, vcpu);
6419
6420 return 0;
6421}
6422
6423void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
6424{
6425 struct kvm_segment cs;
6426
6427 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6428 *db = cs.db;
6429 *l = cs.l;
6430}
6431EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
6432
6433int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
6434 struct kvm_sregs *sregs)
6435{
6436 struct desc_ptr dt;
6437
6438 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6439 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6440 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6441 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6442 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6443 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6444
6445 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6446 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6447
6448 kvm_x86_ops->get_idt(vcpu, &dt);
6449 sregs->idt.limit = dt.size;
6450 sregs->idt.base = dt.address;
6451 kvm_x86_ops->get_gdt(vcpu, &dt);
6452 sregs->gdt.limit = dt.size;
6453 sregs->gdt.base = dt.address;
6454
6455 sregs->cr0 = kvm_read_cr0(vcpu);
6456 sregs->cr2 = vcpu->arch.cr2;
6457 sregs->cr3 = kvm_read_cr3(vcpu);
6458 sregs->cr4 = kvm_read_cr4(vcpu);
6459 sregs->cr8 = kvm_get_cr8(vcpu);
6460 sregs->efer = vcpu->arch.efer;
6461 sregs->apic_base = kvm_get_apic_base(vcpu);
6462
6463 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
6464
6465 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
6466 set_bit(vcpu->arch.interrupt.nr,
6467 (unsigned long *)sregs->interrupt_bitmap);
6468
6469 return 0;
6470}
6471
6472int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
6473 struct kvm_mp_state *mp_state)
6474{
6475 kvm_apic_accept_events(vcpu);
6476 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
6477 vcpu->arch.pv.pv_unhalted)
6478 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
6479 else
6480 mp_state->mp_state = vcpu->arch.mp_state;
6481
6482 return 0;
6483}
6484
6485int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
6486 struct kvm_mp_state *mp_state)
6487{
6488 if (!kvm_vcpu_has_lapic(vcpu) &&
6489 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
6490 return -EINVAL;
6491
6492 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
6493 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
6494 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
6495 } else
6496 vcpu->arch.mp_state = mp_state->mp_state;
6497 kvm_make_request(KVM_REQ_EVENT, vcpu);
6498 return 0;
6499}
6500
6501int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
6502 int reason, bool has_error_code, u32 error_code)
6503{
6504 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6505 int ret;
6506
6507 init_emulate_ctxt(vcpu);
6508
6509 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
6510 has_error_code, error_code);
6511
6512 if (ret)
6513 return EMULATE_FAIL;
6514
6515 kvm_rip_write(vcpu, ctxt->eip);
6516 kvm_set_rflags(vcpu, ctxt->eflags);
6517 kvm_make_request(KVM_REQ_EVENT, vcpu);
6518 return EMULATE_DONE;
6519}
6520EXPORT_SYMBOL_GPL(kvm_task_switch);
6521
6522int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6523 struct kvm_sregs *sregs)
6524{
6525 struct msr_data apic_base_msr;
6526 int mmu_reset_needed = 0;
6527 int pending_vec, max_bits, idx;
6528 struct desc_ptr dt;
6529
6530 if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
6531 return -EINVAL;
6532
6533 dt.size = sregs->idt.limit;
6534 dt.address = sregs->idt.base;
6535 kvm_x86_ops->set_idt(vcpu, &dt);
6536 dt.size = sregs->gdt.limit;
6537 dt.address = sregs->gdt.base;
6538 kvm_x86_ops->set_gdt(vcpu, &dt);
6539
6540 vcpu->arch.cr2 = sregs->cr2;
6541 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
6542 vcpu->arch.cr3 = sregs->cr3;
6543 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
6544
6545 kvm_set_cr8(vcpu, sregs->cr8);
6546
6547 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
6548 kvm_x86_ops->set_efer(vcpu, sregs->efer);
6549 apic_base_msr.data = sregs->apic_base;
6550 apic_base_msr.host_initiated = true;
6551 kvm_set_apic_base(vcpu, &apic_base_msr);
6552
6553 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
6554 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
6555 vcpu->arch.cr0 = sregs->cr0;
6556
6557 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
6558 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
6559 if (sregs->cr4 & X86_CR4_OSXSAVE)
6560 kvm_update_cpuid(vcpu);
6561
6562 idx = srcu_read_lock(&vcpu->kvm->srcu);
6563 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
6564 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6565 mmu_reset_needed = 1;
6566 }
6567 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6568
6569 if (mmu_reset_needed)
6570 kvm_mmu_reset_context(vcpu);
6571
6572 max_bits = KVM_NR_INTERRUPTS;
6573 pending_vec = find_first_bit(
6574 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
6575 if (pending_vec < max_bits) {
6576 kvm_queue_interrupt(vcpu, pending_vec, false);
6577 pr_debug("Set back pending irq %d\n", pending_vec);
6578 }
6579
6580 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6581 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6582 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6583 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6584 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6585 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6586
6587 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6588 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6589
6590 update_cr8_intercept(vcpu);
6591
6592
6593 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
6594 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
6595 !is_protmode(vcpu))
6596 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6597
6598 kvm_make_request(KVM_REQ_EVENT, vcpu);
6599
6600 return 0;
6601}
6602
6603int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
6604 struct kvm_guest_debug *dbg)
6605{
6606 unsigned long rflags;
6607 int i, r;
6608
6609 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
6610 r = -EBUSY;
6611 if (vcpu->arch.exception.pending)
6612 goto out;
6613 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
6614 kvm_queue_exception(vcpu, DB_VECTOR);
6615 else
6616 kvm_queue_exception(vcpu, BP_VECTOR);
6617 }
6618
6619
6620
6621
6622
6623 rflags = kvm_get_rflags(vcpu);
6624
6625 vcpu->guest_debug = dbg->control;
6626 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
6627 vcpu->guest_debug = 0;
6628
6629 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
6630 for (i = 0; i < KVM_NR_DB_REGS; ++i)
6631 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
6632 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
6633 } else {
6634 for (i = 0; i < KVM_NR_DB_REGS; i++)
6635 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6636 }
6637 kvm_update_dr7(vcpu);
6638
6639 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6640 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
6641 get_segment_base(vcpu, VCPU_SREG_CS);
6642
6643
6644
6645
6646
6647 kvm_set_rflags(vcpu, rflags);
6648
6649 kvm_x86_ops->update_db_bp_intercept(vcpu);
6650
6651 r = 0;
6652
6653out:
6654
6655 return r;
6656}
6657
6658
6659
6660
6661int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
6662 struct kvm_translation *tr)
6663{
6664 unsigned long vaddr = tr->linear_address;
6665 gpa_t gpa;
6666 int idx;
6667
6668 idx = srcu_read_lock(&vcpu->kvm->srcu);
6669 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
6670 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6671 tr->physical_address = gpa;
6672 tr->valid = gpa != UNMAPPED_GVA;
6673 tr->writeable = 1;
6674 tr->usermode = 0;
6675
6676 return 0;
6677}
6678
6679int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6680{
6681 struct i387_fxsave_struct *fxsave =
6682 &vcpu->arch.guest_fpu.state->fxsave;
6683
6684 memcpy(fpu->fpr, fxsave->st_space, 128);
6685 fpu->fcw = fxsave->cwd;
6686 fpu->fsw = fxsave->swd;
6687 fpu->ftwx = fxsave->twd;
6688 fpu->last_opcode = fxsave->fop;
6689 fpu->last_ip = fxsave->rip;
6690 fpu->last_dp = fxsave->rdp;
6691 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
6692
6693 return 0;
6694}
6695
6696int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6697{
6698 struct i387_fxsave_struct *fxsave =
6699 &vcpu->arch.guest_fpu.state->fxsave;
6700
6701 memcpy(fxsave->st_space, fpu->fpr, 128);
6702 fxsave->cwd = fpu->fcw;
6703 fxsave->swd = fpu->fsw;
6704 fxsave->twd = fpu->ftwx;
6705 fxsave->fop = fpu->last_opcode;
6706 fxsave->rip = fpu->last_ip;
6707 fxsave->rdp = fpu->last_dp;
6708 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
6709
6710 return 0;
6711}
6712
6713int fx_init(struct kvm_vcpu *vcpu)
6714{
6715 int err;
6716
6717 err = fpu_alloc(&vcpu->arch.guest_fpu);
6718 if (err)
6719 return err;
6720
6721 fpu_finit(&vcpu->arch.guest_fpu);
6722
6723
6724
6725
6726 vcpu->arch.xcr0 = XSTATE_FP;
6727
6728 vcpu->arch.cr0 |= X86_CR0_ET;
6729
6730 return 0;
6731}
6732EXPORT_SYMBOL_GPL(fx_init);
6733
6734static void fx_free(struct kvm_vcpu *vcpu)
6735{
6736 fpu_free(&vcpu->arch.guest_fpu);
6737}
6738
6739void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
6740{
6741 if (vcpu->guest_fpu_loaded)
6742 return;
6743
6744
6745
6746
6747
6748
6749 kvm_put_guest_xcr0(vcpu);
6750 vcpu->guest_fpu_loaded = 1;
6751 __kernel_fpu_begin();
6752 fpu_restore_checking(&vcpu->arch.guest_fpu);
6753 trace_kvm_fpu(1);
6754}
6755
6756void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6757{
6758 kvm_put_guest_xcr0(vcpu);
6759
6760 if (!vcpu->guest_fpu_loaded)
6761 return;
6762
6763 vcpu->guest_fpu_loaded = 0;
6764 fpu_save_init(&vcpu->arch.guest_fpu);
6765 __kernel_fpu_end();
6766 ++vcpu->stat.fpu_reload;
6767 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
6768 trace_kvm_fpu(0);
6769}
6770
6771void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
6772{
6773 kvmclock_reset(vcpu);
6774
6775 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6776 fx_free(vcpu);
6777 kvm_x86_ops->vcpu_free(vcpu);
6778}
6779
6780struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
6781 unsigned int id)
6782{
6783 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
6784 printk_once(KERN_WARNING
6785 "kvm: SMP vm created on host with unstable TSC; "
6786 "guest TSC will not be reliable\n");
6787 return kvm_x86_ops->vcpu_create(kvm, id);
6788}
6789
6790int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6791{
6792 int r;
6793
6794 vcpu->arch.mtrr_state.have_fixed = 1;
6795 r = vcpu_load(vcpu);
6796 if (r)
6797 return r;
6798 kvm_vcpu_reset(vcpu);
6799 kvm_mmu_setup(vcpu);
6800 vcpu_put(vcpu);
6801
6802 return r;
6803}
6804
6805int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6806{
6807 int r;
6808 struct msr_data msr;
6809 struct kvm *kvm = vcpu->kvm;
6810
6811 r = vcpu_load(vcpu);
6812 if (r)
6813 return r;
6814 msr.data = 0x0;
6815 msr.index = MSR_IA32_TSC;
6816 msr.host_initiated = true;
6817 kvm_write_tsc(vcpu, &msr);
6818 vcpu_put(vcpu);
6819
6820 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
6821 KVMCLOCK_SYNC_PERIOD);
6822
6823 return r;
6824}
6825
6826void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6827{
6828 int r;
6829 vcpu->arch.apf.msr_val = 0;
6830
6831 r = vcpu_load(vcpu);
6832 BUG_ON(r);
6833 kvm_mmu_unload(vcpu);
6834 vcpu_put(vcpu);
6835
6836 fx_free(vcpu);
6837 kvm_x86_ops->vcpu_free(vcpu);
6838}
6839
6840void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6841{
6842 atomic_set(&vcpu->arch.nmi_queued, 0);
6843 vcpu->arch.nmi_pending = 0;
6844 vcpu->arch.nmi_injected = false;
6845
6846 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6847 vcpu->arch.dr6 = DR6_FIXED_1;
6848 kvm_update_dr6(vcpu);
6849 vcpu->arch.dr7 = DR7_FIXED_1;
6850 kvm_update_dr7(vcpu);
6851
6852 kvm_make_request(KVM_REQ_EVENT, vcpu);
6853 vcpu->arch.apf.msr_val = 0;
6854 vcpu->arch.st.msr_val = 0;
6855
6856 kvmclock_reset(vcpu);
6857
6858 kvm_clear_async_pf_completion_queue(vcpu);
6859 kvm_async_pf_hash_reset(vcpu);
6860 vcpu->arch.apf.halted = false;
6861
6862 kvm_pmu_reset(vcpu);
6863
6864 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
6865 vcpu->arch.regs_avail = ~0;
6866 vcpu->arch.regs_dirty = ~0;
6867
6868 kvm_x86_ops->vcpu_reset(vcpu);
6869}
6870
6871void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
6872{
6873 struct kvm_segment cs;
6874
6875 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6876 cs.selector = vector << 8;
6877 cs.base = vector << 12;
6878 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
6879 kvm_rip_write(vcpu, 0);
6880}
6881
6882int kvm_arch_hardware_enable(void *garbage)
6883{
6884 struct kvm *kvm;
6885 struct kvm_vcpu *vcpu;
6886 int i;
6887 int ret;
6888 u64 local_tsc;
6889 u64 max_tsc = 0;
6890 bool stable, backwards_tsc = false;
6891
6892 kvm_shared_msr_cpu_online();
6893 ret = kvm_x86_ops->hardware_enable(garbage);
6894 if (ret != 0)
6895 return ret;
6896
6897 local_tsc = native_read_tsc();
6898 stable = !check_tsc_unstable();
6899 list_for_each_entry(kvm, &vm_list, vm_list) {
6900 kvm_for_each_vcpu(i, vcpu, kvm) {
6901 if (!stable && vcpu->cpu == smp_processor_id())
6902 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
6903 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6904 backwards_tsc = true;
6905 if (vcpu->arch.last_host_tsc > max_tsc)
6906 max_tsc = vcpu->arch.last_host_tsc;
6907 }
6908 }
6909 }
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949 if (backwards_tsc) {
6950 u64 delta_cyc = max_tsc - local_tsc;
6951 backwards_tsc_observed = true;
6952 list_for_each_entry(kvm, &vm_list, vm_list) {
6953 kvm_for_each_vcpu(i, vcpu, kvm) {
6954 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6955 vcpu->arch.last_host_tsc = local_tsc;
6956 set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
6957 &vcpu->requests);
6958 }
6959
6960
6961
6962
6963
6964
6965
6966 kvm->arch.last_tsc_nsec = 0;
6967 kvm->arch.last_tsc_write = 0;
6968 }
6969
6970 }
6971 return 0;
6972}
6973
6974void kvm_arch_hardware_disable(void *garbage)
6975{
6976 kvm_x86_ops->hardware_disable(garbage);
6977 drop_user_return_notifiers(garbage);
6978}
6979
6980int kvm_arch_hardware_setup(void)
6981{
6982 return kvm_x86_ops->hardware_setup();
6983}
6984
6985void kvm_arch_hardware_unsetup(void)
6986{
6987 kvm_x86_ops->hardware_unsetup();
6988}
6989
6990void kvm_arch_check_processor_compat(void *rtn)
6991{
6992 kvm_x86_ops->check_processor_compatibility(rtn);
6993}
6994
6995bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6996{
6997 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6998}
6999
7000struct static_key kvm_no_apic_vcpu __read_mostly;
7001
7002int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
7003{
7004 struct page *page;
7005 struct kvm *kvm;
7006 int r;
7007
7008 BUG_ON(vcpu->kvm == NULL);
7009 kvm = vcpu->kvm;
7010
7011 vcpu->arch.pv.pv_unhalted = false;
7012 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
7013 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
7014 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7015 else
7016 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
7017
7018 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
7019 if (!page) {
7020 r = -ENOMEM;
7021 goto fail;
7022 }
7023 vcpu->arch.pio_data = page_address(page);
7024
7025 kvm_set_tsc_khz(vcpu, max_tsc_khz);
7026
7027 r = kvm_mmu_create(vcpu);
7028 if (r < 0)
7029 goto fail_free_pio_data;
7030
7031 if (irqchip_in_kernel(kvm)) {
7032 r = kvm_create_lapic(vcpu);
7033 if (r < 0)
7034 goto fail_mmu_destroy;
7035 } else
7036 static_key_slow_inc(&kvm_no_apic_vcpu);
7037
7038 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
7039 GFP_KERNEL);
7040 if (!vcpu->arch.mce_banks) {
7041 r = -ENOMEM;
7042 goto fail_free_lapic;
7043 }
7044 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
7045
7046 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
7047 r = -ENOMEM;
7048 goto fail_free_mce_banks;
7049 }
7050
7051 r = fx_init(vcpu);
7052 if (r)
7053 goto fail_free_wbinvd_dirty_mask;
7054
7055 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
7056 vcpu->arch.pv_time_enabled = false;
7057
7058 vcpu->arch.guest_supported_xcr0 = 0;
7059 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
7060
7061 kvm_async_pf_hash_reset(vcpu);
7062 kvm_pmu_init(vcpu);
7063
7064 return 0;
7065fail_free_wbinvd_dirty_mask:
7066 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
7067fail_free_mce_banks:
7068 kfree(vcpu->arch.mce_banks);
7069fail_free_lapic:
7070 kvm_free_lapic(vcpu);
7071fail_mmu_destroy:
7072 kvm_mmu_destroy(vcpu);
7073fail_free_pio_data:
7074 free_page((unsigned long)vcpu->arch.pio_data);
7075fail:
7076 return r;
7077}
7078
7079void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
7080{
7081 int idx;
7082
7083 kvm_pmu_destroy(vcpu);
7084 kfree(vcpu->arch.mce_banks);
7085 kvm_free_lapic(vcpu);
7086 idx = srcu_read_lock(&vcpu->kvm->srcu);
7087 kvm_mmu_destroy(vcpu);
7088 srcu_read_unlock(&vcpu->kvm->srcu, idx);
7089 free_page((unsigned long)vcpu->arch.pio_data);
7090 if (!irqchip_in_kernel(vcpu->kvm))
7091 static_key_slow_dec(&kvm_no_apic_vcpu);
7092}
7093
7094int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7095{
7096 if (type)
7097 return -EINVAL;
7098
7099 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
7100 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
7101 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
7102 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
7103
7104
7105 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
7106
7107 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
7108 &kvm->arch.irq_sources_bitmap);
7109
7110 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
7111 mutex_init(&kvm->arch.apic_map_lock);
7112 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
7113
7114 pvclock_update_vm_gtod_copy(kvm);
7115
7116 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
7117 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
7118
7119 return 0;
7120}
7121
7122static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
7123{
7124 int r;
7125 r = vcpu_load(vcpu);
7126 BUG_ON(r);
7127 kvm_mmu_unload(vcpu);
7128 vcpu_put(vcpu);
7129}
7130
7131static void kvm_free_vcpus(struct kvm *kvm)
7132{
7133 unsigned int i;
7134 struct kvm_vcpu *vcpu;
7135
7136
7137
7138
7139 kvm_for_each_vcpu(i, vcpu, kvm) {
7140 kvm_clear_async_pf_completion_queue(vcpu);
7141 kvm_unload_vcpu_mmu(vcpu);
7142 }
7143 kvm_for_each_vcpu(i, vcpu, kvm)
7144 kvm_arch_vcpu_free(vcpu);
7145
7146 mutex_lock(&kvm->lock);
7147 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
7148 kvm->vcpus[i] = NULL;
7149
7150 atomic_set(&kvm->online_vcpus, 0);
7151 mutex_unlock(&kvm->lock);
7152}
7153
7154void kvm_arch_sync_events(struct kvm *kvm)
7155{
7156 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
7157 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
7158 kvm_free_all_assigned_devices(kvm);
7159 kvm_free_pit(kvm);
7160}
7161
7162void kvm_arch_destroy_vm(struct kvm *kvm)
7163{
7164 if (current->mm == kvm->mm) {
7165
7166
7167
7168
7169
7170 struct kvm_userspace_memory_region mem;
7171 memset(&mem, 0, sizeof(mem));
7172 mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
7173 kvm_set_memory_region(kvm, &mem);
7174
7175 mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
7176 kvm_set_memory_region(kvm, &mem);
7177
7178 mem.slot = TSS_PRIVATE_MEMSLOT;
7179 kvm_set_memory_region(kvm, &mem);
7180 }
7181 kvm_iommu_unmap_guest(kvm);
7182 kfree(kvm->arch.vpic);
7183 kfree(kvm->arch.vioapic);
7184 kvm_free_vcpus(kvm);
7185 if (kvm->arch.apic_access_page)
7186 put_page(kvm->arch.apic_access_page);
7187 if (kvm->arch.ept_identity_pagetable)
7188 put_page(kvm->arch.ept_identity_pagetable);
7189 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7190}
7191
7192void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
7193 struct kvm_memory_slot *dont)
7194{
7195 int i;
7196
7197 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7198 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
7199 kvm_kvfree(free->arch.rmap[i]);
7200 free->arch.rmap[i] = NULL;
7201 }
7202 if (i == 0)
7203 continue;
7204
7205 if (!dont || free->arch.lpage_info[i - 1] !=
7206 dont->arch.lpage_info[i - 1]) {
7207 kvm_kvfree(free->arch.lpage_info[i - 1]);
7208 free->arch.lpage_info[i - 1] = NULL;
7209 }
7210 }
7211}
7212
7213int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7214 unsigned long npages)
7215{
7216 int i;
7217
7218 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7219 unsigned long ugfn;
7220 int lpages;
7221 int level = i + 1;
7222
7223 lpages = gfn_to_index(slot->base_gfn + npages - 1,
7224 slot->base_gfn, level) + 1;
7225
7226 slot->arch.rmap[i] =
7227 kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
7228 if (!slot->arch.rmap[i])
7229 goto out_free;
7230 if (i == 0)
7231 continue;
7232
7233 slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
7234 sizeof(*slot->arch.lpage_info[i - 1]));
7235 if (!slot->arch.lpage_info[i - 1])
7236 goto out_free;
7237
7238 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
7239 slot->arch.lpage_info[i - 1][0].write_count = 1;
7240 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
7241 slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
7242 ugfn = slot->userspace_addr >> PAGE_SHIFT;
7243
7244
7245
7246
7247
7248 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
7249 !kvm_largepages_enabled()) {
7250 unsigned long j;
7251
7252 for (j = 0; j < lpages; ++j)
7253 slot->arch.lpage_info[i - 1][j].write_count = 1;
7254 }
7255 }
7256
7257 return 0;
7258
7259out_free:
7260 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7261 kvm_kvfree(slot->arch.rmap[i]);
7262 slot->arch.rmap[i] = NULL;
7263 if (i == 0)
7264 continue;
7265
7266 kvm_kvfree(slot->arch.lpage_info[i - 1]);
7267 slot->arch.lpage_info[i - 1] = NULL;
7268 }
7269 return -ENOMEM;
7270}
7271
7272void kvm_arch_memslots_updated(struct kvm *kvm)
7273{
7274
7275
7276
7277
7278 kvm_mmu_invalidate_mmio_sptes(kvm);
7279}
7280
7281int kvm_arch_prepare_memory_region(struct kvm *kvm,
7282 struct kvm_memory_slot *memslot,
7283 struct kvm_userspace_memory_region *mem,
7284 enum kvm_mr_change change)
7285{
7286
7287
7288
7289
7290 if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
7291 unsigned long userspace_addr;
7292
7293
7294
7295
7296
7297 userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
7298 PROT_READ | PROT_WRITE,
7299 MAP_SHARED | MAP_ANONYMOUS, 0);
7300
7301 if (IS_ERR((void *)userspace_addr))
7302 return PTR_ERR((void *)userspace_addr);
7303
7304 memslot->userspace_addr = userspace_addr;
7305 }
7306
7307 return 0;
7308}
7309
7310void kvm_arch_commit_memory_region(struct kvm *kvm,
7311 struct kvm_userspace_memory_region *mem,
7312 const struct kvm_memory_slot *old,
7313 enum kvm_mr_change change)
7314{
7315
7316 int nr_mmu_pages = 0;
7317
7318 if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
7319 int ret;
7320
7321 ret = vm_munmap(old->userspace_addr,
7322 old->npages * PAGE_SIZE);
7323 if (ret < 0)
7324 printk(KERN_WARNING
7325 "kvm_vm_ioctl_set_memory_region: "
7326 "failed to munmap memory\n");
7327 }
7328
7329 if (!kvm->arch.n_requested_mmu_pages)
7330 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
7331
7332 if (nr_mmu_pages)
7333 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
7334
7335
7336
7337
7338
7339 if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
7340 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
7341}
7342
7343void kvm_arch_flush_shadow_all(struct kvm *kvm)
7344{
7345 kvm_mmu_invalidate_zap_all_pages(kvm);
7346}
7347
7348void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
7349 struct kvm_memory_slot *slot)
7350{
7351 kvm_mmu_invalidate_zap_all_pages(kvm);
7352}
7353
7354int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
7355{
7356 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
7357 kvm_x86_ops->check_nested_events(vcpu, false);
7358
7359 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7360 !vcpu->arch.apf.halted)
7361 || !list_empty_careful(&vcpu->async_pf.done)
7362 || kvm_apic_has_events(vcpu)
7363 || vcpu->arch.pv.pv_unhalted
7364 || atomic_read(&vcpu->arch.nmi_queued) ||
7365 (kvm_arch_interrupt_allowed(vcpu) &&
7366 kvm_cpu_has_interrupt(vcpu));
7367}
7368
7369int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
7370{
7371 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
7372}
7373
7374int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
7375{
7376 return kvm_x86_ops->interrupt_allowed(vcpu);
7377}
7378
7379bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
7380{
7381 unsigned long current_rip = kvm_rip_read(vcpu) +
7382 get_segment_base(vcpu, VCPU_SREG_CS);
7383
7384 return current_rip == linear_rip;
7385}
7386EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
7387
7388unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
7389{
7390 unsigned long rflags;
7391
7392 rflags = kvm_x86_ops->get_rflags(vcpu);
7393 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7394 rflags &= ~X86_EFLAGS_TF;
7395 return rflags;
7396}
7397EXPORT_SYMBOL_GPL(kvm_get_rflags);
7398
7399void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7400{
7401 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
7402 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
7403 rflags |= X86_EFLAGS_TF;
7404 kvm_x86_ops->set_rflags(vcpu, rflags);
7405 kvm_make_request(KVM_REQ_EVENT, vcpu);
7406}
7407EXPORT_SYMBOL_GPL(kvm_set_rflags);
7408
7409void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
7410{
7411 int r;
7412
7413 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
7414 work->wakeup_all)
7415 return;
7416
7417 r = kvm_mmu_reload(vcpu);
7418 if (unlikely(r))
7419 return;
7420
7421 if (!vcpu->arch.mmu.direct_map &&
7422 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
7423 return;
7424
7425 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
7426}
7427
7428static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
7429{
7430 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
7431}
7432
7433static inline u32 kvm_async_pf_next_probe(u32 key)
7434{
7435 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
7436}
7437
7438static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7439{
7440 u32 key = kvm_async_pf_hash_fn(gfn);
7441
7442 while (vcpu->arch.apf.gfns[key] != ~0)
7443 key = kvm_async_pf_next_probe(key);
7444
7445 vcpu->arch.apf.gfns[key] = gfn;
7446}
7447
7448static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
7449{
7450 int i;
7451 u32 key = kvm_async_pf_hash_fn(gfn);
7452
7453 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
7454 (vcpu->arch.apf.gfns[key] != gfn &&
7455 vcpu->arch.apf.gfns[key] != ~0); i++)
7456 key = kvm_async_pf_next_probe(key);
7457
7458 return key;
7459}
7460
7461bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7462{
7463 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
7464}
7465
7466static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7467{
7468 u32 i, j, k;
7469
7470 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
7471 while (true) {
7472 vcpu->arch.apf.gfns[i] = ~0;
7473 do {
7474 j = kvm_async_pf_next_probe(j);
7475 if (vcpu->arch.apf.gfns[j] == ~0)
7476 return;
7477 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
7478
7479
7480
7481
7482
7483 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
7484 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
7485 i = j;
7486 }
7487}
7488
7489static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
7490{
7491
7492 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
7493 sizeof(val));
7494}
7495
7496void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
7497 struct kvm_async_pf *work)
7498{
7499 struct x86_exception fault;
7500
7501 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
7502 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
7503
7504 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
7505 (vcpu->arch.apf.send_user_only &&
7506 kvm_x86_ops->get_cpl(vcpu) == 0))
7507 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
7508 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
7509 fault.vector = PF_VECTOR;
7510 fault.error_code_valid = true;
7511 fault.error_code = 0;
7512 fault.nested_page_fault = false;
7513 fault.address = work->arch.token;
7514 kvm_inject_page_fault(vcpu, &fault);
7515 }
7516}
7517
7518void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
7519 struct kvm_async_pf *work)
7520{
7521 struct x86_exception fault;
7522
7523 trace_kvm_async_pf_ready(work->arch.token, work->gva);
7524 if (work->wakeup_all)
7525 work->arch.token = ~0;
7526 else
7527 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
7528
7529 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
7530 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
7531 fault.vector = PF_VECTOR;
7532 fault.error_code_valid = true;
7533 fault.error_code = 0;
7534 fault.nested_page_fault = false;
7535 fault.address = work->arch.token;
7536 kvm_inject_page_fault(vcpu, &fault);
7537 }
7538 vcpu->arch.apf.halted = false;
7539 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7540}
7541
7542bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
7543{
7544 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
7545 return true;
7546 else
7547 return !kvm_event_needs_reinjection(vcpu) &&
7548 kvm_x86_ops->interrupt_allowed(vcpu);
7549}
7550
7551void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
7552{
7553 atomic_inc(&kvm->arch.noncoherent_dma_count);
7554}
7555EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
7556
7557void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
7558{
7559 atomic_dec(&kvm->arch.noncoherent_dma_count);
7560}
7561EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
7562
7563bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
7564{
7565 return atomic_read(&kvm->arch.noncoherent_dma_count);
7566}
7567EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
7568
7569EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
7570EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
7571EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
7572EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
7573EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
7574EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
7575EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
7576EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
7577EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
7578EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
7579EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7580EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7581EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
7582